shithub: sox

Download patch

ref: fa1c063c5073beb4d675c72c522bf2f3e3115f76
parent: 6830d4ab3cefe8592fc784b18551c88b21a8ccfb
author: Mans Rullgard <mans@mansr.com>
date: Wed Aug 5 17:46:53 EDT 2020

wav: clean up header parsing sequence

Replace sprawling code with a clearer loop for walking the chunks.

--- a/src/wav.c
+++ b/src/wav.c
@@ -92,8 +92,6 @@
     gsm_signal     *gsmsample;
     int            gsmindex;
     size_t      gsmbytecount;    /* counts bytes written to data block */
-    sox_bool       isRF64;          /* True if file being read is a RF64 */
-    uint64_t       ds64_dataSize;   /* Size of data chunk from ds64 header */
 } priv_t;
 
 static char *wav_format_str(unsigned wFormatTag);
@@ -371,47 +369,9 @@
 /* General Sox WAV file code                                                */
 /****************************************************************************/
 
-static int sndfile_workaround(uint64_t *len, sox_format_t *ft) {
-    char magic[5];
-    off_t here;
-
-    here = lsx_tell(ft);
-
-    lsx_debug("Attempting work around for bad ds64 length bug");
-
-    /* Seek to last four bytes of chunk, assuming size is correct. */
-    if (lsx_seeki(ft, (off_t)(*len)-4, SEEK_CUR) != SOX_SUCCESS)
-    {
-        lsx_fail_errno(ft, SOX_EHDR, "WAV chunk appears to have invalid size %ld.", *len);
-        return SOX_EOF;
-    }
-
-    /* Get the last four bytes to see if it is an "fmt " chunk */
-    if (lsx_reads(ft, magic, (size_t)4) == SOX_EOF)
-    {
-        lsx_fail_errno(ft,SOX_EHDR, "WAV chunk appears to have invalid size %ld.", *len);
-        return SOX_EOF;
-    }
-
-    /* Seek back to where we were, which won't work if you're piping */
-    if (lsx_seeki(ft, here, SEEK_SET)!=SOX_SUCCESS)
-    {
-        lsx_fail_errno(ft,SOX_EHDR, "Cannot seek backwards to work around possible broken header.");
-        return SOX_EOF;
-    }
-    if (memcmp(magic, "fmt ", (size_t)4)==0)
-    {
-        /* If the last four bytes were "fmt ", len is almost certainly four bytes too big. */
-        lsx_debug("File had libsndfile bug, working around tell=%ld", lsx_tell(ft));
-        *len -= 4;
-    }
-    return SOX_SUCCESS;
-}
-
 static int findChunk(sox_format_t * ft, const char *Label, uint64_t *len)
 {
     char magic[5];
-    priv_t *wav = (priv_t *) ft->priv;
     uint32_t len_tmp;
 
     lsx_debug("Searching for %2x %2x %2x %2x", Label[0], Label[1], Label[2], Label[3]);
@@ -431,35 +391,8 @@
             return SOX_EOF;
         }
 
-        if (len_tmp == 0xffffffff && wav->isRF64==sox_true)
-        {
-            /* Chunk length should come from ds64 header */
-            if (memcmp(magic, "data", (size_t)4)==0)
-            {
-                *len = wav->ds64_dataSize;
-            }
-            else
-            {
-                lsx_fail_errno(ft, SOX_EHDR, "Cannot yet read block sizes of arbitary RF64 chunks, cannot find chunk '%s'", Label);
-                return SOX_EOF;
-            }
-        }
-        else {
-            *len = len_tmp;
-        }
+        *len = len_tmp;
 
-        /* Work around for a bug in libsndfile
-         * https://github.com/erikd/libsndfile/commit/7fa1c57c37844a9d44642ea35e6638238b8af19e#src/rf64.c
-           The ds64 chunk should be 0x1c bytes, not 0x20.
-         */
-        if ((*len) == 0x20 && memcmp(Label, "ds64", (size_t)4)==0)
-        {
-            int fail;
-            if ((fail = sndfile_workaround(len, ft)) != SOX_SUCCESS) {
-                return fail;
-            }
-        }
-
         if (memcmp(Label, magic, (size_t)4) == 0)
             break; /* Found the given chunk */
 
@@ -818,12 +751,31 @@
       return SOX_EOF;
     }
 
-    /* Skip anything left over from fmt chunk */
-    lsx_seeki(ft, (off_t)len, SEEK_CUR);
-
     return 0;
 }
 
+static sox_bool valid_chunk_id(const char p[4])
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+        if (p[i] < 0x20 || p[i] > 0x7f)
+            return sox_false;
+
+    return sox_true;
+}
+
+static int read_chunk_header(sox_format_t *ft, char tag[4], uint32_t *len)
+{
+    int r;
+
+    r = lsx_readbuf(ft, tag, 4);
+    if (r < 4)
+        return SOX_EOF;
+
+    return lsx_readdw(ft, len);
+}
+
 /*
  * Do anything required before you start reading samples.
  * Read file header.
@@ -834,100 +786,145 @@
 static int startread(sox_format_t * ft)
 {
     priv_t *       wav = (priv_t *) ft->priv;
-    char        magic[5];
+    char        magic[5] = { 0 };
+    uint32_t    clen;
     uint64_t    len;
     int err;
 
+    sox_bool isRF64 = sox_false;
+    uint64_t ds64_riff_size;
+    uint64_t ds64_data_size;
+    uint64_t ds64_sample_count;
+
     /* wave file characteristics */
     uint64_t      qwRiffLength;
-    uint32_t      dwRiffLength_tmp;
-    uint64_t      qwDataLength;    /* length of sound data in bytes */
+    uint64_t      qwDataLength = 0;
     char text[256];
     uint32_t      dwLoopPos;
+    sox_bool have_fmt = sox_false;
 
     ft->sox_errno = SOX_SUCCESS;
     wav->ignoreSize = ft->signal.length == SOX_IGNORE_LENGTH;
+    ft->encoding.reverse_bytes = MACHINE_IS_BIGENDIAN;
 
-    if (lsx_reads(ft, magic, (size_t)4) == SOX_EOF || (strncmp("RIFF", magic, (size_t)4) != 0 &&
-                                             strncmp("RIFX", magic, (size_t)4) != 0 && strncmp("RF64", magic, (size_t)4)!=0 ))
-    {
-        lsx_fail_errno(ft,SOX_EHDR,"WAVE: RIFF header not found");
+    if (read_chunk_header(ft, magic, &clen))
         return SOX_EOF;
-    }
 
-    /* RIFX is a Big-endian RIFF */
-    if (strncmp("RIFX", magic, (size_t)4) == 0)
-    {
+    if (!memcmp(magic, "RIFX", 4)) {
         lsx_debug("Found RIFX header");
         ft->encoding.reverse_bytes = MACHINE_IS_LITTLEENDIAN;
+    } else if (!memcmp(magic, "RF64", 4)) {
+        lsx_debug("Found RF64 header");
+        isRF64 = sox_true;
+    } else if (memcmp(magic, "RIFF", 4)) {
+        lsx_fail_errno(ft,SOX_EHDR,"WAVE: RIFF header not found");
+        return SOX_EOF;
     }
-    else ft->encoding.reverse_bytes = MACHINE_IS_BIGENDIAN;
 
-    if (strncmp("RF64", magic, (size_t)4) == 0)
-    {
-        wav->isRF64 = sox_true;
-    }
-    else
-    {
-        wav->isRF64 = sox_false;
-    }
+    qwRiffLength = clen;
 
-    lsx_readdw(ft, &dwRiffLength_tmp);
-    qwRiffLength = dwRiffLength_tmp;
-
-    if (lsx_reads(ft, magic, (size_t)4) == SOX_EOF || strncmp("WAVE", magic, (size_t)4))
-    {
+    if (lsx_readbuf(ft, magic, 4) < 4 || memcmp(magic, "WAVE", 4)) {
         lsx_fail_errno(ft,SOX_EHDR,"WAVE header not found");
         return SOX_EOF;
     }
 
-    if (wav->isRF64 && findChunk(ft, "ds64", &len) != SOX_EOF) {
-        lsx_debug("Found ds64 header");
+    while (!read_chunk_header(ft, magic, &clen)) {
+        off_t cstart = lsx_tell(ft);
+        off_t pos;
 
-        if (dwRiffLength_tmp==0xffffffff)
-        {
-            lsx_readqw(ft, &qwRiffLength);
+        if (!valid_chunk_id(magic)) {
+            lsx_fail_errno(ft, SOX_EHDR, "invalid chunk ID found");
+            return SOX_EOF;
         }
-        else
-        {
-            lsx_skipbytes(ft, (size_t)8);
+
+        lsx_debug("Found chunk '%s', size %u", magic, clen);
+
+        if (!memcmp(magic, "ds64", 4)) {
+            if (!isRF64)
+                lsx_warn("ds64 chunk in non-RF64 file");
+
+            if (clen < 28) {
+                lsx_fail_errno(ft, SOX_EHDR, "ds64 chunk too small");
+                return SOX_EOF;
+            }
+
+            if (clen == 32) {
+                lsx_warn("ds64 chunk size invalid, attempting workaround");
+                clen = 28;
+            }
+
+            lsx_readqw(ft, &ds64_riff_size);
+            lsx_readqw(ft, &ds64_data_size);
+            lsx_readqw(ft, &ds64_sample_count);
+
+            goto next;
         }
-        lsx_readqw(ft, &wav->ds64_dataSize);
-        lsx_skipbytes(ft, (size_t)len-16);
-    }
 
-    /* Now look for the format chunk */
-    if (findChunk(ft, "fmt ", &len) == SOX_EOF)
-    {
-        lsx_fail_errno(ft,SOX_EHDR,"WAVE chunk fmt not found");
-        return SOX_EOF;
+        if (!memcmp(magic, "fmt ", 4)) {
+            err = wav_read_fmt(ft, clen);
+            if (err)
+                return err;
+
+            have_fmt = sox_true;
+
+            goto next;
+        }
+
+        if (!memcmp(magic, "data", 4)) {
+            if (isRF64 && clen == UINT32_MAX)
+                clen = ds64_data_size;
+
+            qwDataLength = clen;
+            wav->dataStart = lsx_tell(ft);
+
+            if (qwDataLength == UINT32_MAX || qwDataLength == MS_UNSPEC)
+                break;
+
+            if (!ft->seekable)
+                break;
+
+            goto next;
+        }
+
+    next:
+        pos = lsx_tell(ft);
+        clen += clen & 1;
+
+        if (pos > cstart + clen) {
+            lsx_fail_errno(ft, SOX_EHDR, "malformed chunk %s", magic);
+            return SOX_EOF;
+        }
+
+        err = lsx_seeki(ft, cstart + clen - pos, SEEK_CUR);
+        if (err)
+            return SOX_EOF;
     }
 
-    err = wav_read_fmt(ft, len);
-    if (err)
-        return err;
+    if (isRF64) {
+        if (wav->numSamples == UINT32_MAX)
+            wav->numSamples = ds64_sample_count;
 
-    /* for non-PCM formats, there's a 'fact' chunk before
-     * the upcoming 'data' chunk */
+        if (qwRiffLength == UINT32_MAX)
+            qwRiffLength = ds64_riff_size;
+    }
 
-    /* Now look for the wave data chunk */
-    if (findChunk(ft, "data", &len) == SOX_EOF)
-    {
-        lsx_fail_errno(ft, SOX_EOF, "Could not find data chunk.");
+    if (!have_fmt) {
+        lsx_fail_errno(ft, SOX_EOF, "fmt chunk not found");
         return SOX_EOF;
     }
 
-    /* ds64 size will have been applied in findChunk */
-    qwDataLength = len;
-
-    if (qwDataLength == MS_UNSPEC || qwDataLength == UINT32_MAX) {
-      wav->ignoreSize = 1;
-      lsx_debug("WAV data length is magic value or UINT32_MAX, ignoring.");
+    if (!wav->dataStart) {
+        lsx_fail_errno(ft, SOX_EOF, "data chunk not found");
+        return SOX_EOF;
     }
 
+    if (ft->seekable)
+        lsx_seeki(ft, wav->dataStart, SEEK_SET);
 
-    /* Data starts here */
-    wav->dataStart = lsx_tell(ft);
+    if (qwDataLength == UINT32_MAX || qwDataLength == MS_UNSPEC) {
+        lsx_warn("WAV data length is magic value or UINT32_MAX, ignoring");
+        wav->ignoreSize = 1;
+    }
 
     switch (wav->formatTag)
     {
@@ -976,7 +973,7 @@
         /*Got this from the quake source.  I think it 32bit aligns the chunks
          * doubt any machine writing Cool Edit Chunks writes them at an odd
          * offset */
-        len = (len + 1) & ~1u;
+        len = (qwDataLength + 1) & ~1u;
         if (lsx_seeki(ft, (off_t)len, SEEK_CUR) == SOX_SUCCESS &&
             findChunk(ft, "LIST", &len) != SOX_EOF)
         {