ref: 76e389ffd1ca957254d8d93dc572d9f8365a6b1c
parent: 0d2cb7ffece662eab2ff1988fca2e755e3f08552
author: thebard <thebard>
date: Wed Feb 23 21:39:31 EST 2000
After more compilation, there is not any function thats taking more than 10% of the total time (see the profile graph) For fun I added threads to psy6, but since its only 7 seconds, the constant rebuilds of the threads gain little improvement, and will suck on non multiple cpu boxes, so I removed it.
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@
DESTDIR =
-SOURCE=aac_qc.c aac_se_enc.c bitstream.c enc_tf.c encoder.c is.c mc_enc.c ms.c psych.c pulse.c tns.c transfo.c fastfft.c nok_ltp_enc.c nok_pitch.c rateconv.c faac.c
+SOURCE=aac_qc.c aac_se_enc.c bitstream.c enc_tf.c encoder.c is.c mc_enc.c ms.c psych.c pulse.c tns.c transfo.c fastfft.c nok_ltp_enc.c nok_pitch.c rateconv.c faac.c semaphore.c
OBJ = $(SOURCE:.c=.o)
--- a/all.h
+++ b/all.h
@@ -29,7 +29,6 @@
#include "tns.h"
#include "bitstream.h"
-
typedef struct
{
int is_present; /* right channel uses intensiy stereo */
--- a/profiling.txt
+++ b/profiling.txt
@@ -159,3 +159,37 @@
3 min, 12.13 sec.
The Outfield Bangin': track 2: Bangin' On My Heart
Bingo! Got it. -O core dumps, and -O2 to -O6 are slower than thuer non optimized counterparts, unless you add -ffast-math. then things are speedy. The assembly it produces still has room for speed. Still more work to do.
+
+------------------------------------------------------------------------------
+thebard
+CVS Wed Feb 23 20:30:01 CST 2000
+-O6 -march=i686 -finline-functions -funroll-loops -ffast-math -g -pg -a
+3 min, 44.97 sec.
+The Outfield Bangin': track 2: Bangin' On My Heart
+This is the profiled fully optimized version. As you can see, there isnt a whole lot letf to optimize...
+Each sample counts as 0.01 seconds.
+ % cumulative self self total
+ time seconds seconds calls us/call us/call name
+ 9.76 20.63 20.63 32619291 0.63 0.66 output_bits
+ 9.18 40.05 19.42 20524 946.21 3941.14 tf_encode_spectrum_aac
+ 7.83 56.60 16.55 20524 806.37 1361.39 psy_step2
+ 7.72 72.93 16.33 41048 397.83 2266.16 EncTf_psycho_acoustic
+ 6.20 86.05 13.12 102792 127.64 128.13 calc_noise
+ 5.14 96.91 10.86 20524 529.14 529.14 psy_step4
+ 4.51 106.44 9.53 41048 232.17 232.17 psy_step6
+ 3.33 113.48 7.04 41048 171.51 171.51 psy_step9
+ 3.22 120.30 6.82 223693 30.49 96.05 noiseless_bit_count
+ 3.22 127.11 6.81 188315 36.16 36.16 PulseCoder
+ 3.16 133.79 6.68 41048 162.74 162.74 psy_step14
+ 3.03 140.20 6.41 203169 31.55 31.55 quantize
+ 2.62 145.75 5.55 7931 699.79 699.79 estimate_delay
+ 2.55 151.14 5.39 20524 262.62 463.36 nok_ltp_reconstruct
+ 2.50 156.42 5.28 751457 7.03 7.03 pfftw_twiddle_4
+ 2.38 161.46 5.04 41048 122.78 122.78 psy_step5
+ 2.03 165.76 4.30 223693 19.22 115.27 bit_search
+ 1.89 169.75 3.99 61089 65.31 102.13 MDCT
+ 1.62 173.18 3.43 10263 334.21 20456.01 EncTfFrame
+ 1.44 176.23 3.05 9744296 0.31 0.31 BsPutBit
+ 1.32 179.03 2.80 20524 136.43 339.21 realft2048
+ 1.28 181.74 2.71 1037456 2.61 2.61 pfftw_32
+ 1.05 183.96 2.22 28455 78.02 297.27 buffer2freq
--- a/psych.c
+++ b/psych.c
@@ -52,9 +52,9 @@
Source file:
-$Id: psych.c,v 1.48 2000/02/23 19:05:49 thebard Exp $
-$Id: psych.c,v 1.48 2000/02/23 19:05:49 thebard Exp $
-$Id: psych.c,v 1.48 2000/02/23 19:05:49 thebard Exp $
+$Id: psych.c,v 1.49 2000/02/24 02:39:31 thebard Exp $
+$Id: psych.c,v 1.49 2000/02/24 02:39:31 thebard Exp $
+$Id: psych.c,v 1.49 2000/02/24 02:39:31 thebard Exp $
**********************************************************************/
@@ -62,9 +62,12 @@
#include <stdlib.h>
#include <math.h>
#include <memory.h>
+#include <pthread.h>
#include "tf_main.h"
#include "psych.h"
#include "transfo.h"
+#include "all.h"
+#include "semaphore.h"
double sqrt2048, sqrt256;
@@ -185,6 +188,7 @@
/* variables for long block */
PSY_STATVARIABLE_SHORT psy_stvar_short[MAX_TIME_CHANNELS+2];
/* variables for short block */
+
/* added by T. Araki (1997.10.16) end */
void EncTf_psycho_acoustic_init( void )
@@ -858,90 +862,6 @@
}
/* added by T. Araki (1997.10.16) end */
}
-
-#ifndef WIN32
-void psy_step6_part1(void *inData)
-{
- int b,bb,i;
- double ecb,ct;
- double sprd;
-
- PARTITION_TABLE_LONG *part_tbl_long;
- PSY_STATVARIABLE_LONG *psy_stvar_long;
- PSY_VARIABLE_LONG *psy_var_long;
- PSY_STEP6_PART1_CONTAINER *container;
-
- container = (PSY_STEP6_PART1_CONTAINER *)inData;
- part_tbl_long = container->part_tbl_long;
- psy_stvar_long = container->psy_stvar_long;
- psy_var_long = container->psy_var_long;
-
- //part_tbl_long = *
-
- for(b = 0; b < part_tbl_long->len; b++){
- ecb = 0.0;
- ct = 0.0;
-
- for(bb = 0; bb < part_tbl_long->len; bb++){
- //sprd = sprdngf(part_tbl_long, part_tbl_short, bb, b, 0);
- sprd = part_tbl_long->dyn->spreading[bb][b];
- ecb += psy_var_long->e[bb] * sprd;
- ct += psy_var_long->c[bb] * sprd;
- }
-
- if (ecb!=0.0) {
- psy_var_long->cb[b] = ct / ecb;
- psy_stvar_long->en[b] = psy_var_long->en[b] = ecb
- * part_tbl_long->dyn->rnorm[b];
- } else {
- psy_var_long->cb[b] = 0.0;
- psy_stvar_long->en[b] = psy_var_long->en[b] = 0;
- }
- }
-}
-
-void psy_step6_part2(void *inData)
-{
- int b,bb,i;
- double ecb,ct;
- double sprd;
-
- PARTITION_TABLE_SHORT *part_tbl_short;
- PSY_STATVARIABLE_SHORT *psy_stvar_short;
- PSY_VARIABLE_SHORT *psy_var_short;
- PSY_STEP6_PART2_CONTAINER *container;
-
- container = (PSY_STEP6_PART2_CONTAINER *)inData;
- part_tbl_short = container->part_tbl_short;
- psy_stvar_short = container->psy_stvar_short;
- psy_var_short = container->psy_var_short;
-
- /* added by T. Araki (1997.10.16) */
- for(i = 0; i < MAX_SHORT_WINDOWS; i++){
- for(b = 0; b < part_tbl_short->len; b++){
- ecb = 0.0;
- ct = 0.0;
-
- for(bb = 0; bb < part_tbl_short->len; bb++){
- //sprd = sprdngf(part_tbl_long, part_tbl_short, bb, b, 1);
- sprd = part_tbl_short->dyn->spreading[bb][b];
- ecb += psy_var_short->e[i][bb] * sprd;
- ct += psy_var_short->c[i][bb] * sprd;
- }
-
- if (ecb!=0.0) {
- psy_var_short->cb[i][b] = ct / ecb;
- psy_stvar_short->en[i][b] = psy_var_short->en[i][b] = ecb
- * part_tbl_short->dyn->rnorm[b];
- } else {
- psy_var_short->cb[i][b] = 0.0;
- psy_stvar_short->en[i][b] = psy_var_short->en[i][b] = 0;
- }
- }
- }
- /* added by T. Araki (1997.10.16) end */
-}
-#endif
void psy_step6(PARTITION_TABLE_LONG *part_tbl_long,
PARTITION_TABLE_SHORT *part_tbl_short,
--- a/psych.h
+++ b/psych.h
@@ -44,7 +44,6 @@
#define M_PI 3.14159265358979323846
#endif
-
typedef struct {
long sampling_rate; /* the following entries are for this sampling rate */
int num_cb_long;
@@ -200,20 +199,6 @@
CH_PSYCH_OUTPUT_SHORT p_chpo_short[][MAX_SHORT_WINDOWS]
);
-#ifndef WIN32
-/* structs for the psy_step6 part1 and part2 threads */
-typedef struct {
- PARTITION_TABLE_LONG *part_tbl_long;
- PSY_STATVARIABLE_LONG *psy_stvar_long;
- PSY_VARIABLE_LONG *psy_var_long;
-} PSY_STEP6_PART1_CONTAINER;
-
-typedef struct {
- PARTITION_TABLE_SHORT *part_tbl_short;
- PSY_STATVARIABLE_SHORT *psy_stvar_short;
- PSY_VARIABLE_SHORT *psy_var_short;
-} PSY_STEP6_PART2_CONTAINER;
-#endif
/* added by T. Okada( 1997.07.10 ) */
/* Jul 10 */
#define psy_max(x,y) ((x) > (y) ? (x) : (y))