shithub: aacenc

Download patch

ref: 76e389ffd1ca957254d8d93dc572d9f8365a6b1c
parent: 0d2cb7ffece662eab2ff1988fca2e755e3f08552
author: thebard <thebard>
date: Wed Feb 23 21:39:31 EST 2000

After more compilation, there is not any function thats taking more than 10% of the total time (see the profile graph) For fun I added threads to psy6, but since its only 7 seconds, the constant rebuilds of the threads gain little improvement, and will suck on non multiple cpu boxes, so I removed it.

--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@
 
 DESTDIR = 
 
-SOURCE=aac_qc.c aac_se_enc.c bitstream.c enc_tf.c encoder.c is.c mc_enc.c ms.c psych.c pulse.c tns.c transfo.c fastfft.c nok_ltp_enc.c nok_pitch.c rateconv.c faac.c
+SOURCE=aac_qc.c aac_se_enc.c bitstream.c enc_tf.c encoder.c is.c mc_enc.c ms.c psych.c pulse.c tns.c transfo.c fastfft.c nok_ltp_enc.c nok_pitch.c rateconv.c faac.c semaphore.c
 
 OBJ = $(SOURCE:.c=.o)
 
--- a/all.h
+++ b/all.h
@@ -29,7 +29,6 @@
 #include "tns.h"
 #include "bitstream.h"
 
-
 typedef struct
 {
     int is_present;	/* right channel uses intensiy stereo */
--- a/profiling.txt
+++ b/profiling.txt
@@ -159,3 +159,37 @@
 3 min, 12.13 sec.
 The Outfield Bangin': track 2: Bangin' On My Heart
 Bingo! Got it. -O core dumps, and -O2 to -O6 are slower than thuer non optimized counterparts, unless you add -ffast-math. then things are speedy. The assembly it produces still has room for speed. Still more work to do.
+
+------------------------------------------------------------------------------
+thebard
+CVS Wed Feb 23 20:30:01 CST 2000
+-O6 -march=i686 -finline-functions -funroll-loops -ffast-math -g -pg -a
+3 min, 44.97 sec.
+The Outfield Bangin': track 2: Bangin' On My Heart
+This is the profiled fully optimized version. As you can see, there isnt a whole lot letf to optimize...
+Each sample counts as 0.01 seconds.
+  %   cumulative   self              self     total           
+ time   seconds   seconds    calls  us/call  us/call  name    
+  9.76     20.63    20.63 32619291     0.63     0.66  output_bits
+  9.18     40.05    19.42    20524   946.21  3941.14  tf_encode_spectrum_aac
+  7.83     56.60    16.55    20524   806.37  1361.39  psy_step2
+  7.72     72.93    16.33    41048   397.83  2266.16  EncTf_psycho_acoustic
+  6.20     86.05    13.12   102792   127.64   128.13  calc_noise
+  5.14     96.91    10.86    20524   529.14   529.14  psy_step4
+  4.51    106.44     9.53    41048   232.17   232.17  psy_step6
+  3.33    113.48     7.04    41048   171.51   171.51  psy_step9
+  3.22    120.30     6.82   223693    30.49    96.05  noiseless_bit_count
+  3.22    127.11     6.81   188315    36.16    36.16  PulseCoder
+  3.16    133.79     6.68    41048   162.74   162.74  psy_step14
+  3.03    140.20     6.41   203169    31.55    31.55  quantize
+  2.62    145.75     5.55     7931   699.79   699.79  estimate_delay
+  2.55    151.14     5.39    20524   262.62   463.36  nok_ltp_reconstruct
+  2.50    156.42     5.28   751457     7.03     7.03  pfftw_twiddle_4
+  2.38    161.46     5.04    41048   122.78   122.78  psy_step5
+  2.03    165.76     4.30   223693    19.22   115.27  bit_search
+  1.89    169.75     3.99    61089    65.31   102.13  MDCT
+  1.62    173.18     3.43    10263   334.21 20456.01  EncTfFrame
+  1.44    176.23     3.05  9744296     0.31     0.31  BsPutBit
+  1.32    179.03     2.80    20524   136.43   339.21  realft2048
+  1.28    181.74     2.71  1037456     2.61     2.61  pfftw_32
+  1.05    183.96     2.22    28455    78.02   297.27  buffer2freq
--- a/psych.c
+++ b/psych.c
@@ -52,9 +52,9 @@
 
 Source file:
 
-$Id: psych.c,v 1.48 2000/02/23 19:05:49 thebard Exp $
-$Id: psych.c,v 1.48 2000/02/23 19:05:49 thebard Exp $
-$Id: psych.c,v 1.48 2000/02/23 19:05:49 thebard Exp $
+$Id: psych.c,v 1.49 2000/02/24 02:39:31 thebard Exp $
+$Id: psych.c,v 1.49 2000/02/24 02:39:31 thebard Exp $
+$Id: psych.c,v 1.49 2000/02/24 02:39:31 thebard Exp $
 
 **********************************************************************/
 
@@ -62,9 +62,12 @@
 #include <stdlib.h>
 #include <math.h>
 #include <memory.h>
+#include <pthread.h>
 #include "tf_main.h"
 #include "psych.h"
 #include "transfo.h"
+#include "all.h"
+#include "semaphore.h"
 
 double sqrt2048, sqrt256;
 
@@ -185,6 +188,7 @@
                                /* variables for long block */
 PSY_STATVARIABLE_SHORT    psy_stvar_short[MAX_TIME_CHANNELS+2];
                                /* variables for short block */
+
 /* added by T. Araki (1997.10.16) end */
 
 void EncTf_psycho_acoustic_init( void )
@@ -858,90 +862,6 @@
     }
 	/* added by T. Araki (1997.10.16) end */
 }
-
-#ifndef WIN32
-void psy_step6_part1(void *inData)
-{
-	int b,bb,i;
-  double ecb,ct;
-  double sprd;
-
-	PARTITION_TABLE_LONG *part_tbl_long;
-	PSY_STATVARIABLE_LONG *psy_stvar_long;
-	PSY_VARIABLE_LONG *psy_var_long;
-	PSY_STEP6_PART1_CONTAINER *container;
-
-	container = (PSY_STEP6_PART1_CONTAINER *)inData;
-	part_tbl_long = container->part_tbl_long;
-	psy_stvar_long = container->psy_stvar_long;
-	psy_var_long = container->psy_var_long;
-
-	//part_tbl_long = *
-
-	for(b = 0; b < part_tbl_long->len; b++){
-    ecb = 0.0;
-    ct = 0.0;
-
-    for(bb = 0; bb < part_tbl_long->len; bb++){
-      //sprd = sprdngf(part_tbl_long, part_tbl_short, bb, b, 0);
-      sprd = part_tbl_long->dyn->spreading[bb][b];
-      ecb += psy_var_long->e[bb] * sprd;
-      ct += psy_var_long->c[bb] * sprd;
-    }
-
-    if (ecb!=0.0) {
-      psy_var_long->cb[b] = ct / ecb;
-      psy_stvar_long->en[b] = psy_var_long->en[b] = ecb
-        * part_tbl_long->dyn->rnorm[b];
-    } else {
-      psy_var_long->cb[b] = 0.0;
-      psy_stvar_long->en[b] = psy_var_long->en[b] = 0;
-    }
-  }
-}
-
-void psy_step6_part2(void *inData)
-{
-	int b,bb,i;
-  double ecb,ct;
-  double sprd;
-
-	PARTITION_TABLE_SHORT *part_tbl_short;
-  PSY_STATVARIABLE_SHORT *psy_stvar_short;
-  PSY_VARIABLE_SHORT *psy_var_short;
-	PSY_STEP6_PART2_CONTAINER *container;
-
-	container = (PSY_STEP6_PART2_CONTAINER *)inData;
-	part_tbl_short = container->part_tbl_short;
-	psy_stvar_short = container->psy_stvar_short;
-	psy_var_short = container->psy_var_short;
-
-	/* added by T. Araki (1997.10.16) */
-  for(i = 0; i < MAX_SHORT_WINDOWS; i++){
-    for(b = 0; b < part_tbl_short->len; b++){
-      ecb = 0.0;
-      ct = 0.0;
-
-      for(bb = 0; bb < part_tbl_short->len; bb++){
-        //sprd = sprdngf(part_tbl_long, part_tbl_short, bb, b, 1);
-        sprd = part_tbl_short->dyn->spreading[bb][b];
-        ecb += psy_var_short->e[i][bb] * sprd;
-        ct += psy_var_short->c[i][bb] * sprd;
-      }
-
-      if (ecb!=0.0) {
-        psy_var_short->cb[i][b] = ct / ecb;
-        psy_stvar_short->en[i][b] = psy_var_short->en[i][b] = ecb
-          * part_tbl_short->dyn->rnorm[b];
-      } else {
-        psy_var_short->cb[i][b] = 0.0;
-        psy_stvar_short->en[i][b] = psy_var_short->en[i][b] = 0;
-      }
-    }
-  }
-  /* added by T. Araki (1997.10.16) end */
-}
-#endif
 
 void psy_step6(PARTITION_TABLE_LONG *part_tbl_long, 
 			   PARTITION_TABLE_SHORT *part_tbl_short, 
--- a/psych.h
+++ b/psych.h
@@ -44,7 +44,6 @@
 #define M_PI 3.14159265358979323846
 #endif
 
-
 typedef struct { 
   long   sampling_rate;                   /* the following entries are for this sampling rate */
   int    num_cb_long;
@@ -200,20 +199,6 @@
   CH_PSYCH_OUTPUT_SHORT p_chpo_short[][MAX_SHORT_WINDOWS]
 );
 
-#ifndef WIN32
-/* structs for the psy_step6 part1 and part2 threads */
-typedef struct {
-	PARTITION_TABLE_LONG *part_tbl_long;
-	PSY_STATVARIABLE_LONG *psy_stvar_long;
-	PSY_VARIABLE_LONG *psy_var_long;
-} PSY_STEP6_PART1_CONTAINER;
-
-typedef struct {
-	PARTITION_TABLE_SHORT *part_tbl_short;
-	PSY_STATVARIABLE_SHORT *psy_stvar_short;
-	PSY_VARIABLE_SHORT *psy_var_short;
-} PSY_STEP6_PART2_CONTAINER;
-#endif
 /* added by T. Okada( 1997.07.10 ) */
 /* Jul 10 */
 #define psy_max(x,y) ((x) > (y) ? (x) : (y))