shithub: opus-tools

Download patch

ref: 070cd744a91204b53441f0bfa44a22a18290c88f
parent: 7de1299afd9b54ad996394723ee86ddd471d6d6f
author: Felicia Lim <flim@google.com>
date: Mon Jul 19 07:59:36 EDT 2021

opusenc: Support for Ambisonics.

Adds the `--channels` flag to indicate that the input is in ambix
format.

Original-patch-by: Andrew Allen <bitllama@google.com>
Signed-off-by: Mark Harris <mark.hsj@gmail.com>

--- a/man/opusenc.1
+++ b/man/opusenc.1
@@ -311,6 +311,13 @@
 Ignore the data length in Wave headers.
 The length will always be ignored when it is implausible (very small or very
 large), but some stdin usage may still need this option to avoid truncation.
+.TP
+.B --channels <ambix>
+Override the format of the input channels.
+The "ambix" option indicates that the input is ambisonics using ACN channel
+ordering with SN3D normalization. All channels in a full ambisonics order must
+be included. A pair of non-diegetic stereo channels can be optionally placed
+after the ambisonics channels.
 .SS "Diagnostic options"
 .TP
 .BI --serial " N"
--- a/src/audio-in.c
+++ b/src/audio-in.c
@@ -437,7 +437,7 @@
         aiff->bigendian = bigendian;
         aiff->unsigned8bit = 0;
 
-        if (aiff->channels>3)
+        if (opt->channels_format==CHANNELS_FORMAT_DEFAULT && aiff->channels>3)
           fprintf(stderr, _("WARNING: AIFF[-C] files with more than three channels use\n"
                   "speaker locations incompatible with Vorbis surround definitions.\n"
                   "Not performing channel location mapping.\n"));
@@ -445,7 +445,7 @@
         opt->readdata = (void *)aiff;
 
         aiff->channel_permute = malloc(aiff->channels * sizeof(int));
-        if (aiff->channels <= 6)
+        if (opt->channels_format == CHANNELS_FORMAT_DEFAULT && aiff->channels <= 6)
             /* Where we know the mappings, use them. */
             memcpy(aiff->channel_permute, aiff_permute_matrix[aiff->channels-1],
                     sizeof(int) * aiff->channels);
@@ -553,40 +553,43 @@
         if (validbits < 1 || validbits > format.samplesize)
             validbits = format.samplesize;
 
-        format.mask = READ_U32_LE(buf+20);
-        /* warn the user if the format mask is not a supported/expected type */
-        switch (format.mask) {
-        case 1539: /* 4.0 using side surround instead of back */
-            fprintf(stderr, _("WARNING: WAV file uses side surround instead of rear for quadraphonic;\n"
-                "remapping side speakers to rear in encoding.\n"));
-            break;
-        case 1551: /* 5.1 using side instead of rear */
-            fprintf(stderr, _("WARNING: WAV file uses side surround instead of rear for 5.1;\n"
-                "remapping side speakers to rear in encoding.\n"));
-            break;
-        case 319:  /* 6.1 using rear instead of side */
-            fprintf(stderr, _("WARNING: WAV file uses rear surround instead of side for 6.1;\n"
-                "remapping rear speakers to side in encoding.\n"));
-            break;
-        case 255:  /* 7.1 'Widescreen' */
-            fprintf(stderr, _("WARNING: WAV file is a 7.1 'Widescreen' channel mapping;\n"
-                "remapping speakers to Vorbis 7.1 format.\n"));
-            break;
-        case 0:    /* default/undeclared */
-        case 1:    /* mono (left only) */
-        case 4:    /* mono (center only) */
-        case 3:    /* stereo */
-        case 51:   /* quad */
-        case 55:   /* 5.0 */
-        case 63:   /* 5.1 */
-        case 1807: /* 6.1 */
-        case 1599: /* 7.1 */
-            break;
-        default:
-            fprintf(stderr, _("WARNING: Unknown WAV surround channel mask: %u\n"
-                    "Blindly mapping speakers using default SMPTE/ITU ordering.\n"),
-                    format.mask);
-            break;
+        if (opt->channels_format == CHANNELS_FORMAT_DEFAULT)
+        {
+            format.mask = READ_U32_LE(buf+20);
+            /* warn the user if the format mask is not a supported/expected type */
+            switch (format.mask) {
+            case 1539: /* 4.0 using side surround instead of back */
+                fprintf(stderr, _("WARNING: WAV file uses side surround instead of rear for quadraphonic;\n"
+                    "remapping side speakers to rear in encoding.\n"));
+                break;
+            case 1551: /* 5.1 using side instead of rear */
+                fprintf(stderr, _("WARNING: WAV file uses side surround instead of rear for 5.1;\n"
+                    "remapping side speakers to rear in encoding.\n"));
+                break;
+            case 319:  /* 6.1 using rear instead of side */
+                fprintf(stderr, _("WARNING: WAV file uses rear surround instead of side for 6.1;\n"
+                    "remapping rear speakers to side in encoding.\n"));
+                break;
+            case 255:  /* 7.1 'Widescreen' */
+                fprintf(stderr, _("WARNING: WAV file is a 7.1 'Widescreen' channel mapping;\n"
+                    "remapping speakers to Vorbis 7.1 format.\n"));
+                break;
+            case 0:    /* default/undeclared */
+            case 1:    /* mono (left only) */
+            case 4:    /* mono (center only) */
+            case 3:    /* stereo */
+            case 51:   /* quad */
+            case 55:   /* 5.0 */
+            case 63:   /* 5.1 */
+            case 1807: /* 6.1 */
+            case 1599: /* 7.1 */
+                break;
+            default:
+                fprintf(stderr, _("WARNING: Unknown WAV surround channel mask: %u\n"
+                        "Blindly mapping speakers using default SMPTE/ITU ordering.\n"),
+                        format.mask);
+                break;
+            }
         }
         format.format = READ_U16_LE(buf+24);
     }
@@ -693,7 +696,7 @@
         opt->readdata = (void *)wav;
 
         wav->channel_permute = malloc(wav->channels * sizeof(int));
-        if (wav->channels <= 8)
+        if (opt->channels_format == CHANNELS_FORMAT_DEFAULT && wav->channels <= 8)
             /* Where we know the mappings, use them. */
             memcpy(wav->channel_permute, wav_permute_matrix[wav->channels-1],
                     sizeof(int) * wav->channels);
--- a/src/encoder.h
+++ b/src/encoder.h
@@ -16,6 +16,9 @@
 # define N_(X) (X)
 #endif
 
+#define CHANNELS_FORMAT_DEFAULT  0
+#define CHANNELS_FORMAT_AMBIX    1
+
 typedef long (*audio_read_func)(void *src, float *buffer, int samples);
 
 typedef struct
@@ -26,6 +29,7 @@
     int rawmode;
     int rawmode_f;
     int channels;
+    int channels_format;
     long rate;
     int gain;
     int samplesize;
--- a/src/flac.c
+++ b/src/flac.c
@@ -386,7 +386,11 @@
       opt->readdata=flac;
       /*FLAC supports 1 to 8 channels only.*/
       /*It uses the same channel mappings as WAV.*/
-      flac->channel_permute=wav_permute_matrix[flac->channels-1];
+      if(opt->channels_format==CHANNELS_FORMAT_DEFAULT){
+        flac->channel_permute=wav_permute_matrix[flac->channels-1];
+      }else{
+        flac->channel_permute=flac_no_permute_matrix;
+      }
       return 1;
     }
   }
--- a/src/flac.h
+++ b/src/flac.h
@@ -51,3 +51,5 @@
 int oggflac_id(unsigned char *buf,int len);
 int flac_open(FILE *in,oe_enc_opt *opt,unsigned char *oldbuf,int buflen);
 void flac_close(void *client_data);
+
+static const int flac_no_permute_matrix[8] = {0,1,2,3,4,5,6,7};
--- a/src/opusenc.c
+++ b/src/opusenc.c
@@ -176,6 +176,7 @@
   printf(" --raw-chan n       Set number of channels for raw input (default: 2)\n");
   printf(" --raw-endianness n 1 for big endian, 0 for little (default: 0)\n");
   printf(" --ignorelength     Ignore the data length in Wave headers\n");
+  printf(" --channels <ambix> Override the format of the input channels\n");
   printf("\nDiagnostic options:\n");
   printf(" --serial n         Force use of a specific stream serial number\n");
   printf(" --save-range file  Save check values for every frame to a file\n");
@@ -349,6 +350,16 @@
   return 0;
 }
 
+static void validate_ambisonics_channel_count(int num_channels)
+{
+  int order_plus_one;
+  int nondiegetic_chs;
+  if(num_channels<1||num_channels>227) fatal("Error: the number of channels must not be <1 or >227.\n");
+  order_plus_one=sqrt(num_channels);
+  nondiegetic_chs=num_channels-order_plus_one*order_plus_one;
+  if(nondiegetic_chs!=0&&nondiegetic_chs!=2) fatal("Error: invalid number of ambisonics channels.\n");
+}
+
 int main(int argc, char **argv)
 {
   static const input_format raw_format = {NULL, 0, raw_open, wav_close, "raw",N_("RAW file reader")};
@@ -375,6 +386,7 @@
     {"set-ctl-int", required_argument, NULL, 0},
     {"help", no_argument, NULL, 0},
     {"help-picture", no_argument, NULL, 0},
+    {"channels", required_argument, NULL, 0},
     {"raw", no_argument, NULL, 0},
     {"raw-bits", required_argument, NULL, 0},
     {"raw-rate", required_argument, NULL, 0},
@@ -439,6 +451,7 @@
   int                comment_padding=512;
   int                serialno;
   opus_int32         lookahead=0;
+  int                mapping_family;
 #ifdef WIN_UNICODE
   int argc_utf8;
   char **argv_utf8;
@@ -460,6 +473,7 @@
   range_file=NULL;
   in_format=NULL;
   inopt.channels=chan;
+  inopt.channels_format=CHANNELS_FORMAT_DEFAULT;
   inopt.rate=rate;
   /* 0 dB gain is recommended unless you know what you're doing */
   inopt.gain=0;
@@ -620,6 +634,14 @@
               optarg);
           }
           max_ogg_delay=(int)floor(val*48.);
+        } else if (strcmp(optname, "channels")==0) {
+          if (strcmp(optarg, "ambix")==0) {
+            inopt.channels_format=CHANNELS_FORMAT_AMBIX;
+          } else {
+            fatal("Invalid input format: %s\n"
+              "--channels only supports 'ambix'\n",
+              optarg);
+          }
         } else if (strcmp(optname, "serial")==0) {
           serialno=atoi(optarg);
         } else if (strcmp(optname, "set-ctl-int")==0) {
@@ -853,11 +875,19 @@
       "Channel count must be in the range 1 to 255.\n", inopt.channels);
   }
 
-  if (downmix==0&&inopt.channels>2&&bitrate>0&&bitrate<(16000*inopt.channels)) {
-    if (!quiet) fprintf(stderr,"Notice: Surround bitrate less than 16 kbit/s per channel, downmixing.\n");
-    downmix=inopt.channels>8?1:2;
+  if (downmix>0&&inopt.channels_format==CHANNELS_FORMAT_AMBIX) {
+    /*Ambisonics channels should be downmixed to mono or stereo, and then
+      encoded using channel mapping family 0.*/
+    fatal("Error: downmixing is currently unimplemented for ambisonics input.\n");
   }
 
+  if (inopt.channels_format==CHANNELS_FORMAT_DEFAULT) {
+    if (downmix==0&&inopt.channels>2&&bitrate>0&&bitrate<(16000*inopt.channels)) {
+      if (!quiet) fprintf(stderr,"Notice: Surround bitrate less than 16 kbit/s per channel, downmixing.\n");
+      downmix=inopt.channels>8?1:2;
+    }
+  }
+
   if (downmix>0&&downmix<inopt.channels) downmix=setup_downmix(&inopt,downmix);
   else downmix=0;
 
@@ -868,9 +898,19 @@
     inopt.total_samples_per_channel = (opus_int64)
       ((double)inopt.total_samples_per_channel * (48000./(double)rate));
 
+  if (inopt.channels_format==CHANNELS_FORMAT_AMBIX) {
+    validate_ambisonics_channel_count(chan);
+    /*Use channel mapping 3 for orders {1, 2, 3} with 4 to 18 channels
+      (including the non-diegetic stereo track). For other orders with no
+      demixing matrices currently available, use channel mapping 2.*/
+    mapping_family=(chan>=4&&chan<=18)?3:2;
+  } else {
+    mapping_family=chan>8?255:chan>2;
+  }
+
   /*Initialize Opus encoder*/
   enc = ope_encoder_create_callbacks(&callbacks, &data, inopt.comments, rate,
-    chan, chan>8?255:chan>2, &ret);
+    chan, mapping_family, &ret);
   if (enc == NULL) fatal("Error: failed to create encoder: %s\n", ope_strerror(ret));
   data.enc = enc;