shithub: opus-tools

Download patch

ref: 7b92abb9f17b4ec813344cad2aa931d17926c7b2
parent: a2be3388fb5905efa4652adf859ea3b7e1ceaea6
author: Mark Harris <mark.hsj@gmail.com>
date: Sat Nov 11 12:55:47 EST 2023

opusenc: Allow downmixing ambix/discrete channels

Also fix informational display of input and output channels so that it
is correct when downmixing, and includes format information.

--- a/man/opusenc.1
+++ b/man/opusenc.1
@@ -101,10 +101,15 @@
 Set expected packet loss in percent (default: 0).
 .TP
 .B --downmix-mono
-Downmix to mono.
+Downmix stereo, surround, ambisonics, or discrete audio channels to mono.
+Audio that is already mono is unchanged.
+Ambisonic downmixes include a downmix of any non-diegetic channels.
+Independent discrete channels are downmixed by weighting each channel equally.
 .TP
 .B --downmix-stereo
-Downmix multichannel speaker configurations to stereo.
+Downmix surround or ambisonics to stereo.  Mono and stereo audio is unchanged.
+Ambisonic downmixes include any non-diegetic channels.
+Independent discrete channels are downmixed to mono.
 .TP
 .B --no-phase-inv
 Disable use of phase inversion for intensity stereo.
@@ -312,13 +317,16 @@
 The length will always be ignored when it is implausible (very small or very
 large), but some stdin usage may still need this option to avoid truncation.
 .TP
-.B --channels <ambix, discrete>
+.BR --channels " " ambix | discrete
 Override the format of the input channels.
-The "ambix" option indicates that the input is ambisonics using ACN channel
+.IP
+"ambix" indicates that the input is ambisonics using ACN channel
 ordering with SN3D normalization. All channels in a full ambisonics order must
 be included. A pair of non-diegetic stereo channels can be optionally placed
-after the ambisonics channels. The option "discrete" forces uncoupled
-channels.
+after the ambisonics channels.
+.IP
+"discrete" indicates that the input channels are independent discrete channels
+with no assigned meaning or speaker position.
 .SS "Diagnostic options"
 .TP
 .BI --serial " N"
--- a/src/audio-in.c
+++ b/src/audio-in.c
@@ -112,10 +112,10 @@
 
 /* Define the supported formats here */
 input_format formats[] = {
-    {wav_id, 12, wav_open, wav_close, "wav", N_("WAV file reader")},
-    {aiff_id, 12, aiff_open, wav_close, "aiff", N_("AIFF/AIFC file reader")},
-    {flac_id,     0x10000, flac_open, flac_close, "flac", N_("FLAC file reader")},
-    {oggflac_id, 33, flac_open, flac_close, "ogg", N_("Ogg FLAC file reader")},
+    {wav_id, 12, wav_open, wav_close, "WAV", N_("WAV file reader")},
+    {aiff_id, 12, aiff_open, wav_close, "AIFF", N_("AIFF/AIFC file reader")},
+    {flac_id,     0x10000, flac_open, flac_close, "FLAC", N_("FLAC file reader")},
+    {oggflac_id, 33, flac_open, flac_close, "Ogg FLAC", N_("Ogg FLAC file reader")},
     {NULL, 0, NULL, NULL, NULL, NULL}
 };
 
@@ -923,18 +923,17 @@
     long in_samples = d->real_reader(d->real_readdata, d->bufs, samples);
     int i,j,k,in_ch,out_ch;
 
-    in_ch=d->in_channels;
-    out_ch=d->out_channels;
+    in_ch = d->in_channels;
+    out_ch = d->out_channels;
 
-    for (i=0;i<in_samples;i++) {
-      for (j=0;j<out_ch;j++) {
-        float *samp;
-        samp=&buffer[i*out_ch+j];
-        *samp=0;
-        for (k=0;k<in_ch;k++) {
-          *samp+=d->bufs[i*in_ch+k]*d->matrix[in_ch*j+k];
+    for (i=0; i<in_samples; ++i) {
+        for (j=0; j<out_ch; ++j) {
+            float *samp = &buffer[i*out_ch+j];
+            *samp = 0;
+            for (k=0; k<in_ch; ++k) {
+                *samp += d->bufs[i*in_ch+k] * d->matrix[in_ch*j+k];
+            }
         }
-      }
     }
     return in_samples;
 }
@@ -941,7 +940,7 @@
 
 int setup_downmix(oe_enc_opt *opt, int out_channels)
 {
-    static const float stupid_matrix[7][8][2] = {
+    static const float surround_downmix_matrix[7][8][2] = {
       /*2*/  {{1,0},{0,1}},
       /*3*/  {{1,0},{0.7071f,0.7071f},{0,1}},
       /*4*/  {{1,0},{0,1},{0.866f,0.5f},{0.5f,0.866f}},
@@ -950,17 +949,30 @@
       /*7*/  {{1,0},{0.7071f,0.7071f},{0,1},{0.866f,0.5f},{0.5f,0.866f},{0.6123f,0.6123f},{0.7071f,0.7071f}},
       /*8*/  {{1,0},{0.7071f,0.7071f},{0,1},{0.866f,0.5f},{0.5f,0.866f},{0.866f,0.5f},{0.5f,0.866f},{0.7071f,0.7071f}},
     };
-    float sum;
     downmix *d;
-    int i,j;
+    int i, j;
 
-    if (opt->channels<=out_channels || out_channels>2 || opt->channels<=0 || out_channels<=0) {
-        fprintf(stderr, _("Downmix must actually downmix and only knows mono/stereo out.\n"));
-        return 0;
+    if ((opt->channels_format == CHANNELS_FORMAT_DEFAULT && opt->channels <= 8)
+     || (opt->channels_format == CHANNELS_FORMAT_AMBIX)) {
+        if (out_channels != 1 && out_channels != 2) {
+            fprintf(stderr, _("Downmix must be to mono or stereo.\n"));
+            out_channels = 2;
+        }
+    } else if (out_channels != 1) {
+        fprintf(stderr, _("Discrete channels can only be downmixed to mono.\n"));
+        out_channels = 1;
     }
 
-    if (out_channels==2 && opt->channels>8) {
-        fprintf(stderr, _("Downmix only knows how to mix >8ch to mono.\n"));
+    if (opt->channels_format == CHANNELS_FORMAT_DEFAULT) {
+        if (opt->channels <= out_channels) {
+            /* nothing to do */
+            return 0;
+        }
+    }
+
+    if (opt->channels <= 1) {
+        /* metadata-only change */
+        opt->channels_format = CHANNELS_FORMAT_DEFAULT;
         return 0;
     }
 
@@ -969,27 +981,58 @@
     d->matrix = malloc(sizeof(float)*opt->channels*out_channels);
     d->real_reader = opt->read_samples;
     d->real_readdata = opt->readdata;
-    d->in_channels=opt->channels;
-    d->out_channels=out_channels;
+    d->in_channels = opt->channels;
+    d->out_channels = out_channels;
 
-    if (out_channels==1&&d->in_channels>8) {
-      for (i=0;i<d->in_channels;i++)d->matrix[i]=1.0f/d->in_channels;
-    } else if (out_channels==2) {
-      for (j=0;j<d->out_channels;j++)
-        for (i=0;i<d->in_channels;i++)d->matrix[d->in_channels*j+i]=
-          stupid_matrix[opt->channels-2][i][j];
+    if (opt->channels_format == CHANNELS_FORMAT_DEFAULT && d->in_channels <= 8) {
+        /* surround downmix */
+        float sum;
+        if (out_channels == 2) {
+            for (j = 0; j < out_channels; ++j)
+                for (i = 0; i < d->in_channels; ++i)
+                    d->matrix[d->in_channels*j+i] =
+                        surround_downmix_matrix[d->in_channels-2][i][j];
+        } else {
+            for (i = 0; i < d->in_channels; ++i)
+                d->matrix[i] =
+                    (surround_downmix_matrix[d->in_channels-2][i][0]) +
+                    (surround_downmix_matrix[d->in_channels-2][i][1]);
+        }
+        sum = 0;
+        for (i = 0; i < d->in_channels*out_channels; ++i)
+            sum += d->matrix[i];
+        sum = (float)out_channels / sum;
+        for (i = 0; i < d->in_channels*out_channels; ++i)
+            d->matrix[i] *= sum;
+    } else if (opt->channels_format == CHANNELS_FORMAT_AMBIX) {
+        /* downmix according to RFC 8486 section 4 */
+        int order_plus_one = sqrt(d->in_channels);
+        int nondiegetic_channels =
+            d->in_channels - order_plus_one * order_plus_one == 2 ? 2 : 0;
+        int use_y = out_channels == 2 && d->in_channels >= 4;
+        for (i = 1; i < d->in_channels*out_channels; ++i)
+            d->matrix[i] = 0.0f;
+        d->matrix[0] = 1.0f / (1 + use_y + nondiegetic_channels);
+        if (out_channels == 2) {
+            d->matrix[d->in_channels] = d->matrix[0];
+            if (use_y) {
+                d->matrix[1] = d->matrix[0];
+                d->matrix[d->in_channels+1] = -d->matrix[0];
+            }
+        }
+        if (nondiegetic_channels == 2) {
+            d->matrix[d->in_channels-2] =
+                d->matrix[out_channels*d->in_channels-1] =
+                    d->matrix[0] * out_channels;
+        }
     } else {
-      for (i=0;i<d->in_channels;i++)d->matrix[i]=
-        (stupid_matrix[opt->channels-2][i][0])+
-        (stupid_matrix[opt->channels-2][i][1]);
+        for (i = 0; i < d->in_channels; ++i)
+            d->matrix[i] = 1.0f / d->in_channels;
     }
-    sum=0;
-    for (i=0;i<d->in_channels*d->out_channels;i++)sum+=d->matrix[i];
-    sum=(float)out_channels/sum;
-    for (i=0;i<d->in_channels*d->out_channels;i++)d->matrix[i]*=sum;
+
     opt->read_samples = read_downmix;
     opt->readdata = d;
-
+    opt->channels_format = CHANNELS_FORMAT_DEFAULT;
     opt->channels = out_channels;
     return out_channels;
 }
--- a/src/opusenc.c
+++ b/src/opusenc.c
@@ -176,7 +176,7 @@
   printf(" --raw-chan n       Set number of channels for raw input (default: 2)\n");
   printf(" --raw-endianness n 1 for big endian, 0 for little (default: 0)\n");
   printf(" --ignorelength     Ignore the data length in Wave headers\n");
-  printf(" --channels         Override the format of the input channels (ambix, discrete)\n");
+  printf(" --channels fmt     Override the format of the input channels (ambix, discrete)\n");
   printf("\nDiagnostic options:\n");
   printf(" --serial n         Force use of a specific stream serial number\n");
   printf(" --save-range file  Save check values for every frame to a file\n");
@@ -360,9 +360,30 @@
   if(nondiegetic_chs!=0&&nondiegetic_chs!=2) fatal("Error: invalid number of ambisonics channels.\n");
 }
 
+static const char *channels_format_name(int channels_format, int channels)
+{
+  static const char *format_name[8] =
+  {
+    "mono", "stereo", "linear surround", "quadraphonic",
+    "5.0 surround", "5.1 surround", "6.1 surround", "7.1 surround"
+  };
+
+  if (channels_format == CHANNELS_FORMAT_DEFAULT) {
+    if (channels >= 1 && channels <= 8) {
+      return format_name[channels-1];
+    }
+  } else if (channels_format == CHANNELS_FORMAT_AMBIX) {
+    return "ambix";
+  }
+  return "discrete";
+}
+
 int main(int argc, char **argv)
 {
-  static const input_format raw_format = {NULL, 0, raw_open, wav_close, "raw",N_("RAW file reader")};
+  static const input_format raw_format =
+  {
+    NULL, 0, raw_open, wav_close, "Raw", N_("Raw file reader")
+  };
   struct option long_options[] =
   {
     {"quiet", no_argument, NULL, 0},
@@ -452,6 +473,8 @@
   int                serialno;
   opus_int32         lookahead=0;
   int                mapping_family;
+  int                orig_channels;
+  int                orig_channels_format;
 #ifdef WIN_UNICODE
   int argc_utf8;
   char **argv_utf8;
@@ -877,25 +900,19 @@
       "Channel count must be in the range 1 to 255.\n", inopt.channels);
   }
 
-  if (downmix>0&&inopt.channels_format==CHANNELS_FORMAT_AMBIX) {
-    /*Ambisonics channels should be downmixed to mono or stereo, and then
-      encoded using channel mapping family 0.*/
-    fatal("Error: downmixing is currently unimplemented for ambisonics input.\n");
-  }
-
-  if (downmix>0&&inopt.channels_format==CHANNELS_FORMAT_DISCRETE) {
-    /*Downmix of uncoupled channels not specified.*/
-    fatal("Error: downmixing is currently unimplemented for independent input.\n");
-  }
-
   if (inopt.channels_format==CHANNELS_FORMAT_DEFAULT) {
     if (downmix==0&&inopt.channels>2&&bitrate>0&&bitrate<(16000*inopt.channels)) {
       if (!quiet) fprintf(stderr,"Notice: Surround bitrate less than 16 kbit/s per channel, downmixing.\n");
       downmix=inopt.channels>8?1:2;
     }
+  } else if (inopt.channels_format==CHANNELS_FORMAT_AMBIX) {
+    validate_ambisonics_channel_count(inopt.channels);
   }
 
-  if (downmix>0&&downmix<inopt.channels) downmix=setup_downmix(&inopt,downmix);
+  orig_channels = inopt.channels;
+  orig_channels_format = inopt.channels_format;
+
+  if (downmix>0) downmix=setup_downmix(&inopt, downmix);
   else downmix=0;
 
   rate=inopt.rate;
@@ -906,7 +923,6 @@
       ((double)inopt.total_samples_per_channel * (48000./(double)rate));
 
   if (inopt.channels_format==CHANNELS_FORMAT_AMBIX) {
-    validate_ambisonics_channel_count(chan);
     /*Use channel mapping 3 for orders {1, 2, 3} with 4 to 18 channels
       (including the non-diegetic stereo track). For other orders with no
       demixing matrices currently available, use channel mapping 2.*/
@@ -1051,32 +1067,35 @@
 
   if (!quiet) {
     int opus_app;
-    fprintf(stderr,"Encoding using %s",opus_version);
+    fprintf(stderr, "Encoding using %s", opus_version);
     ret = ope_encoder_ctl(enc, OPUS_GET_APPLICATION(&opus_app));
     if (ret != OPE_OK) fprintf(stderr, "\n");
-    else if (opus_app==OPUS_APPLICATION_VOIP) fprintf(stderr," (VoIP)\n");
-    else if (opus_app==OPUS_APPLICATION_AUDIO) fprintf(stderr," (audio)\n");
-    else if (opus_app==OPUS_APPLICATION_RESTRICTED_LOWDELAY) fprintf(stderr," (low-delay)\n");
-    else fprintf(stderr," (unknown application)\n");
-    fprintf(stderr,"-----------------------------------------------------\n");
-    fprintf(stderr,"   Input: %0.6g kHz, %d channel%s\n",
-            rate/1000.,chan,chan<2?"":"s");
-    fprintf(stderr,"  Output: %d channel%s (",chan,chan<2?"":"s");
-    if (data.nb_coupled>0) fprintf(stderr,"%d coupled",data.nb_coupled*2);
+    else if (opus_app==OPUS_APPLICATION_VOIP) fprintf(stderr, " (VoIP)\n");
+    else if (opus_app==OPUS_APPLICATION_AUDIO) fprintf(stderr, " (audio)\n");
+    else if (opus_app==OPUS_APPLICATION_RESTRICTED_LOWDELAY) fprintf(stderr, " (low-delay)\n");
+    else fprintf(stderr, " (unknown application)\n");
+    fprintf(stderr, "-----------------------------------------------------\n");
+    fprintf(stderr, "   Input: %s, %0.6g kHz, %d channel%s, %s\n",
+            in_format->format, rate/1000.,
+            orig_channels, orig_channels==1?"":"s",
+            channels_format_name(orig_channels_format, orig_channels));
+    fprintf(stderr, "  Output: Opus, %d channel%s (", chan, chan==1?"":"s");
+    if (data.nb_coupled>0) fprintf(stderr, "%d coupled", data.nb_coupled*2);
     if (data.nb_streams-data.nb_coupled>0) fprintf(stderr,
-       "%s%d uncoupled",data.nb_coupled>0?", ":"",
+       "%s%d uncoupled", data.nb_coupled>0?", ":"",
        data.nb_streams-data.nb_coupled);
-    fprintf(stderr,")\n          %0.2gms packets, %0.6g kbit/s%s\n",
+    fprintf(stderr, "), %s\n          %0.2gms packets, %0.6g kbit/s%s\n",
+       channels_format_name(inopt.channels_format, chan),
        frame_size/(48000/1000.), bitrate/1000.,
        with_hard_cbr?" CBR":with_cvbr?" CVBR":" VBR");
-    fprintf(stderr," Preskip: %d\n",lookahead);
+    fprintf(stderr, " Preskip: %d\n", lookahead);
     if (data.frange!=NULL) {
       fprintf(stderr, "          Writing final range file %s\n", range_file);
     }
-    fprintf(stderr,"\n");
+    fprintf(stderr, "\n");
   }
 
-  if (strcmp(outFile,"-")==0) {
+  if (strcmp(outFile, "-")==0) {
 #if defined WIN32 || defined _WIN32
     _setmode(_fileno(stdout), _O_BINARY);
 #endif