ref: 1dc0c8014b77a32a8636115318fdc11570754b3e
parent: c6f10472878c229161b88319780967c1ca6b168f
author: Mark Harris <mark.hsj@gmail.com>
date: Sun Apr 15 17:48:47 EDT 2018
opusenc: Add --music, --speech, --no-phase-inv --music and --speech override automatic detection and affect low bitrates. --no-phase-inv disables use of phase inversion for intensity stereo.
--- a/man/opusenc.1
+++ b/man/opusenc.1
@@ -26,6 +26,10 @@
] [
.B --hard-cbr
] [
+.B --music
+] [
+.B --speech
+] [
.B --comp
.I complexity
] [
@@ -39,6 +43,8 @@
] [
.B --downmix-stereo
] [
+.B --no-phase-inv
+] [
.B --max-delay
.I ms
] [
@@ -132,6 +138,7 @@
64 kbit/s per mono stream and 96 kbit/s per coupled pair.
.IP "--vbr"
Use variable bitrate encoding (default)
+
In VBR mode the bitrate may go up and down freely depending on the content
to achieve more consistent quality.
.IP "--cvbr"
@@ -147,6 +154,22 @@
speech codecs work. This delivers lower overall quality but is useful
where bitrate changes might leak data in encrypted channels or on
synchronous transports.
+.IP "--music"
+Override automatic detection and tune low bitrate encoding for music.
+By default, music is detected automatically and the classification
+may vary over time.
+
+Tuning impacts lower bitrates that involve tradeoffs between speech
+clarity and musical accuracy, and has no impact at bitrates typically
+used for high quality music encoding.
+.IP "--speech"
+Override automatic detection and tune low bitrate encoding for speech.
+By default, speech is detected automatically and the classification
+may vary over time.
+
+Tuning impacts lower bitrates that involve tradeoffs between speech
+clarity and musical accuracy, and has no impact at bitrates typically
+used for high quality music encoding.
.IP "--comp N"
Set encoding computational complexity (0-10, default: 10). Zero gives the
fastest encodes but lower quality, while 10 gives the highest quality
@@ -165,6 +188,10 @@
Downmix to mono
.IP "--downmix-stereo"
Downmix to stereo (if >2 channels input)
+.IP "--no-phase-inv"
+Disable use of phase inversion for intensity stereo. This trades some
+stereo quality for a higher quality mono downmix, and is useful when
+encoding stereo audio that is likely to be downmixed to mono after decoding.
.IP "--max-delay N"
Set maximum container delay in milliseconds (0-1000, default: 1000)
--- a/src/opusenc.c
+++ b/src/opusenc.c
@@ -151,6 +151,8 @@
printf(" --vbr Use variable bitrate encoding (default)\n");
printf(" --cvbr Use constrained variable bitrate encoding\n");
printf(" --hard-cbr Use hard constant bitrate encoding\n");
+ printf(" --music Tune low bitrates for music (override automatic detection)\n");
+ printf(" --speech Tune low bitrates for speech (override automatic detection)\n");
printf(" --comp n Set encoding complexity (0-10, default: 10 (slowest))\n");
printf(" --framesize n Set maximum frame size in milliseconds\n");
printf(" (2.5, 5, 10, 20, 40, 60, default: 20)\n");
@@ -157,6 +159,9 @@
printf(" --expect-loss n Set expected packet loss in percent (default: 0)\n");
printf(" --downmix-mono Downmix to mono\n");
printf(" --downmix-stereo Downmix to stereo (if >2 channels)\n");
+#ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
+ printf(" --no-phase-inv Disable use of phase inversion for intensity stereo\n");
+#endif
printf(" --max-delay n Set maximum container delay in milliseconds\n");
printf(" (0-1000, default: 1000)\n");
printf("\nMetadata options:\n");
@@ -324,6 +329,8 @@
{"hard-cbr",no_argument,NULL, 0},
{"vbr",no_argument,NULL, 0},
{"cvbr",no_argument,NULL, 0},
+ {"music", no_argument, NULL, 0},
+ {"speech", no_argument, NULL, 0},
{"comp", required_argument, NULL, 0},
{"complexity", required_argument, NULL, 0},
{"framesize", required_argument, NULL, 0},
@@ -331,6 +338,7 @@
{"downmix-mono",no_argument,NULL, 0},
{"downmix-stereo",no_argument,NULL, 0},
{"no-downmix",no_argument,NULL, 0},
+ {"no-phase-inv", no_argument, NULL, 0},
{"max-delay", required_argument, NULL, 0},
{"serial", required_argument, NULL, 0},
{"save-range", required_argument, NULL, 0},
@@ -387,9 +395,11 @@
int chan=2;
int with_hard_cbr=0;
int with_cvbr=0;
+ int signal_type=OPUS_AUTO;
int expect_loss=0;
int complexity=10;
int downmix=0;
+ int no_phase_inv=0;
int *opt_ctls_ctlval;
int opt_ctls=0;
int max_ogg_delay=48000; /*48kHz samples*/
@@ -531,6 +541,12 @@
downmix=2;
} else if (strcmp(optname, "no-downmix")==0) {
downmix=-1;
+ } else if (strcmp(optname, "no-phase-inv")==0) {
+ no_phase_inv=1;
+ } else if (strcmp(optname, "music")==0) {
+ signal_type=OPUS_SIGNAL_MUSIC;
+ } else if (strcmp(optname, "speech")==0) {
+ signal_type=OPUS_SIGNAL_VOICE;
} else if (strcmp(optname, "expect-loss")==0) {
expect_loss=atoi(optarg);
if (expect_loss>100||expect_loss<0) {
@@ -875,6 +891,10 @@
with_cvbr, ope_strerror(ret));
}
}
+ ret = ope_encoder_ctl(enc, OPUS_SET_SIGNAL(signal_type));
+ if (ret != OPE_OK) {
+ fatal("Error: OPUS_SET_SIGNAL failed: %s\n", ope_strerror(ret));
+ }
ret = ope_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity));
if (ret != OPE_OK) {
fatal("Error: OPUS_SET_COMPLEXITY %d failed: %s\n", complexity, ope_strerror(ret));
@@ -890,6 +910,17 @@
fprintf(stderr, "Warning: OPUS_SET_LSB_DEPTH failed: %s\n", ope_strerror(ret));
}
#endif
+ if (no_phase_inv) {
+#ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
+ ret = ope_encoder_ctl(enc, OPUS_SET_PHASE_INVERSION_DISABLED(1));
+ if (ret != OPE_OK) {
+ fprintf(stderr, "Warning: OPUS_SET_PHASE_INVERSION_DISABLED failed: %s\n",
+ ope_strerror(ret));
+ }
+#else
+ fprintf(stderr,"Warning: Disabling phase inversion is not supported.\n");
+#endif
+ }
/*This should be the last set of SET ctls, so it can override the defaults.*/
for (i=0;i<opt_ctls;i++) {