ref: ae42b1006b61dbcf2d1ccc8f93260dd18be69782
parent: 19b619acde594e882412c25420169fabdc256120
parent: f912b8420f1ee8b5617bd155c6faa8f870375769
author: Lion <lieff@users.noreply.github.com>
date: Wed Aug 22 10:44:14 EDT 2018
Merge pull request #33 from kcat/master Add float32 output, rename original 16-bit output method
--- a/minimp3.h
+++ b/minimp3.h
@@ -26,12 +26,16 @@
#endif
void mp3dec_init(mp3dec_t *dec);
+#ifndef MINIMP3_FLOAT_OUTPUT
int mp3dec_decode_frame(mp3dec_t *dec, const unsigned char *mp3, int mp3_bytes, short *pcm, mp3dec_frame_info_t *info);
+#else
+int mp3dec_decode_frame(mp3dec_t *dec, const unsigned char *mp3, int mp3_bytes, float *pcm, mp3dec_frame_info_t *info);
+void mp3dec_f32_to_s16(const float *in, short *out, int num_samples);
+#endif
#ifdef __cplusplus
}
#endif
-#endif /*MINIMP3_H*/
#ifdef MINIMP3_IMPLEMENTATION
@@ -1365,19 +1369,28 @@
#endif
}
+#ifndef MINIMP3_FLOAT_OUTPUT
+typedef short mp3d_sample_t;
+
static short mp3d_scale_pcm(float sample)
{
- if (sample > 32767.0) return (short) 32767;
- if (sample < -32768.0) return (short)-32768;
- int s = (int)(sample + .5f);
+ if (sample >= 32766.5) return (short) 32767;
+ if (sample <= -32767.5) return (short)-32768;
+ short s = (short)(sample + .5f);
s -= (s < 0); /* away from zero, to be compliant */
- if (s > 32767) return (short) 32767;
- if (s < -32768) return (short)-32768;
- return (short)s;
+ return s;
}
+#else
+typedef float mp3d_sample_t;
-static void mp3d_synth_pair(short *pcm, int nch, const float *z)
+static float mp3d_scale_pcm(float sample)
{
+ return sample / 32768.0f;
+}
+#endif
+
+static void mp3d_synth_pair(mp3d_sample_t *pcm, int nch, const float *z)
+{
float a;
a = (z[14*64] - z[ 0]) * 29;
a += (z[ 1*64] + z[13*64]) * 213;
@@ -1401,11 +1414,11 @@
pcm[16*nch] = mp3d_scale_pcm(a);
}
-static void mp3d_synth(float *xl, short *dstl, int nch, float *lins)
+static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins)
{
int i;
float *xr = xl + 576*(nch - 1);
- short *dstr = dstl + (nch - 1);
+ mp3d_sample_t *dstr = dstl + (nch - 1);
static const float g_win[] = {
-1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992,
@@ -1462,6 +1475,7 @@
V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7)
{
+#ifndef MINIMP3_FLOAT_OUTPUT
#if HAVE_SSE
static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
@@ -1490,6 +1504,32 @@
vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2);
vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2);
#endif
+
+#else
+
+ static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };
+ a = VMUL(a, g_scale);
+ b = VMUL(b, g_scale);
+#if HAVE_SSE
+ _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));
+ _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
+ _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));
+ _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));
+ _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));
+ _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));
+ _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));
+ _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));
+#else
+ vst1q_lane_f32(dstr + (15 - i)*nch, a, 1);
+ vst1q_lane_f32(dstr + (17 + i)*nch, b, 1);
+ vst1q_lane_f32(dstl + (15 - i)*nch, a, 0);
+ vst1q_lane_f32(dstl + (17 + i)*nch, b, 0);
+ vst1q_lane_f32(dstr + (47 - i)*nch, a, 3);
+ vst1q_lane_f32(dstr + (49 + i)*nch, b, 3);
+ vst1q_lane_f32(dstl + (47 - i)*nch, a, 2);
+ vst1q_lane_f32(dstl + (49 + i)*nch, b, 2);
+#endif
+#endif /* MINIMP3_FLOAT_OUTPUT */
}
} else
#endif
@@ -1527,7 +1567,7 @@
#endif
}
-static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, short *pcm, float *lins)
+static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, mp3d_sample_t *pcm, float *lins)
{
int i;
for (i = 0; i < nch; i++)
@@ -1611,7 +1651,7 @@
dec->header[0] = 0;
}
-int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, short *pcm, mp3dec_frame_info_t *info)
+int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info)
{
int i = 0, igr, frame_size = 0, success = 1;
const uint8_t *hdr;
@@ -1704,4 +1744,68 @@
}
return success*hdr_frame_samples(dec->header);
}
+
+#ifdef MINIMP3_FLOAT_OUTPUT
+void mp3dec_f32_to_s16(const float *in, short *out, int num_samples)
+{
+ if(num_samples > 0)
+ {
+ int i = 0;
+#if HAVE_SIMD
+ int aligned_count = num_samples & ~7;
+
+ for(;i < aligned_count;i+=8)
+ {
+ static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f };
+ f4 a = VMUL(VLD(&in[i ]), g_scale);
+ f4 b = VMUL(VLD(&in[i+4]), g_scale);
+#if HAVE_SSE
+ static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
+ static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
+ __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
+ _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
+ out[i ] = _mm_extract_epi16(pcm8, 0);
+ out[i+1] = _mm_extract_epi16(pcm8, 1);
+ out[i+2] = _mm_extract_epi16(pcm8, 2);
+ out[i+3] = _mm_extract_epi16(pcm8, 3);
+ out[i+4] = _mm_extract_epi16(pcm8, 4);
+ out[i+5] = _mm_extract_epi16(pcm8, 5);
+ out[i+6] = _mm_extract_epi16(pcm8, 6);
+ out[i+7] = _mm_extract_epi16(pcm8, 7);
+#else
+ int16x4_t pcma, pcmb;
+ a = VADD(a, VSET(0.5f));
+ b = VADD(b, VSET(0.5f));
+ pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
+ pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
+ vst1_lane_s16(out+i , pcma, 0);
+ vst1_lane_s16(out+i+1, pcma, 1);
+ vst1_lane_s16(out+i+2, pcma, 2);
+ vst1_lane_s16(out+i+3, pcma, 3);
+ vst1_lane_s16(out+i+4, pcmb, 0);
+ vst1_lane_s16(out+i+5, pcmb, 1);
+ vst1_lane_s16(out+i+6, pcmb, 2);
+ vst1_lane_s16(out+i+7, pcmb, 3);
+#endif
+ }
+#endif
+ for(;i < num_samples;i++)
+ {
+ float sample = in[i] * 32768.0f;
+ if(sample >= 32766.5)
+ out[i] = (short) 32767;
+ else if (sample <= -32767.5)
+ out[i] = (short)-32768;
+ else
+ {
+ short s = (short)(sample + .5f);
+ s -= (s < 0); /* away from zero, to be compliant */
+ out[i] = s;
+ }
+ }
+ }
+}
+#endif
+
#endif /*MINIMP3_IMPLEMENTATION*/
+#endif /*MINIMP3_H*/