shithub: h264bsd

Download patch

ref: 50eb34662722e6ee8d48f9f5109d4ea3e5d8cddc
parent: 2cbf64119c40de4896f033ac1fbfa63f406d1f14
author: Sam Leitch <sam.leitch@calgaryscientific.com>
date: Tue Mar 18 12:43:20 EDT 2014

Added 4:4:4 Y Cb Cr A output to hopefully speed up Flex.

binary files a/flex/lib/h264bsd_asm.swc b/flex/lib/h264bsd_asm.swc differ
--- a/flex/src/h264bsd/Decoder.as
+++ b/flex/src/h264bsd/Decoder.as
@@ -36,6 +36,7 @@
         private var _h264bsdPicHeight:int = 0;
         private var _h264bsdNextOutputPicture:int = 0;
         private var _h264bsdNextOutputPictureBGRA:int = 0;
+        private var _h264bsdNextOutputPictureYCbCrA:int = 0;
         private var _h264bsdDecode:int = 0;
         private var _h264bsdShutdown:int = 0;
         private var _h264bsdFree:int = 0;
@@ -131,6 +132,27 @@
             return bytes;
         }
         
+        public function getNextOutputPictureBytesYCbCrA():ByteArray {
+            var picIdPtr:int = CModule.malloc(4);
+            var isIdrPicPtr:int = CModule.malloc(4);
+            var numErrMbsPtr:int = CModule.malloc(4);
+            
+            var bytesPtr:int = 0;
+            var args:Vector.<int> = new <int>[_storagePtr, picIdPtr, isIdrPicPtr, numErrMbsPtr];
+            bytesPtr = CModule.callI(_h264bsdNextOutputPictureYCbCrA, args);
+            
+            var bytes:ByteArray = new ByteArray();
+            bytes.endian = Endian.LITTLE_ENDIAN;
+            CModule.readBytes(bytesPtr, outputByteLengthRGBA, bytes);
+            bytes.position = 0;
+            
+            if (picIdPtr != 0) CModule.free(picIdPtr);
+            if (isIdrPicPtr != 0) CModule.free(isIdrPicPtr);
+            if (numErrMbsPtr != 0) CModule.free(numErrMbsPtr);
+            
+            return bytes;
+        }
+        
         public function getNextOutputPictureBytes():ByteArray {
             var picIdPtr:int = CModule.malloc(4);
             var isIdrPicPtr:int = CModule.malloc(4);
@@ -154,55 +176,14 @@
         
         public function drawNextOutputPicture(target:Bitmap, transform:Matrix = null):void
         {
-            var i420Bytes:ByteArray = getNextOutputPictureBytes();
+            var outputPictureBytes:ByteArray = getNextOutputPictureBytesYCbCrA();
             var cinfo:CroppingInfo = getCroppingInfo();
             
             var width:int = cinfo.uncroppedWidth;
             var height:int = cinfo.uncroppedHeight;
-            
-            var yPtr:int = 0;
-            var cbPtr:int = width * height;
-            var crPtr:int = cbPtr + width / 2 * height / 2;
-            
-            var yuvVector:Vector.<uint> = new Vector.<uint>(width * height);
-            var yuvPtr:int = 0;
-            
-            var y:int = 0;
-            var x:int = 0;
-            
-            while(y < height)
-            {
-                var yuv:uint = 0xff;
-                yuv = (yuv << 8) + i420Bytes[yPtr];
-                yuv = (yuv << 8) + i420Bytes[cbPtr];
-                yuv = (yuv << 8) + i420Bytes[crPtr];
-                
-                i420Bytes[yuvPtr] = yuv;
-                
-                ++x;
-                ++yPtr;
-                ++yuvPtr;
-                
-                if((x & 1) == 0)
-                {
-                    ++cbPtr;
-                    ++crPtr;
-                }
-                
-                if(x < width) continue;
-                
-                ++y;
-                x = 0;
-                
-                if((y & 1) > 0)
-                {
-                    cbPtr -= width / 2;
-                    crPtr -= width / 2;
-                }
-            }
-            
+                        
             var outputPicture:BitmapData = new BitmapData(width, height);
-            outputPicture.setVector(new Rectangle(0,0, width, height), yuvVector);
+            outputPicture.setPixels(new Rectangle(0,0, width, height), outputPictureBytes);
             
             var yuvFilter:ColorMatrixFilter = new ColorMatrixFilter(
                 [1.1643828125, 0, 1.59602734375, -.87078515625, 0,
@@ -264,6 +245,7 @@
             _h264bsdPicHeight = CModule.getPublicSymbol("h264bsdPicHeight");
             _h264bsdNextOutputPicture = CModule.getPublicSymbol("h264bsdNextOutputPicture");
             _h264bsdNextOutputPictureBGRA = CModule.getPublicSymbol("h264bsdNextOutputPictureBGRA");
+            _h264bsdNextOutputPictureYCbCrA = CModule.getPublicSymbol("h264bsdNextOutputPictureYCbCrA");
             _h264bsdDecode = CModule.getPublicSymbol("h264bsdDecode");
             _h264bsdShutdown = CModule.getPublicSymbol("h264bsdShutdown");
             _h264bsdFree = CModule.getPublicSymbol("h264bsdFree");
@@ -278,7 +260,8 @@
                 _h264bsdShutdown == 0 ||
                 _h264bsdFree == 0 ||
                 _h264bsdCroppingParams == 0 || 
-                _h264bsdNextOutputPictureBGRA == 0) {
+                _h264bsdNextOutputPictureBGRA == 0 ||
+                _h264bsdNextOutputPictureYCbCrA == 0) {
                 throw new Error("One or more missing entries in h264bsd function table.");
             }
         }
--- a/src/h264bsd_decoder.c
+++ b/src/h264bsd_decoder.c
@@ -707,6 +707,48 @@
 
 /*------------------------------------------------------------------------------
 
+    Function: h264bsdNextOutputPictureYCbCrA
+
+        Functional description:
+            Get next output picture in display order, converted to YCbCrA.
+            YCbCrA is a 4:4:4 format that uses u32 pixels where the MSB is alpha.
+
+        Inputs:
+            pStorage    pointer to storage data structure
+
+        Outputs:
+            picId       identifier of the picture will be stored here
+            isIdrPic    IDR flag of the picture will be stored here
+            numErrMbs   number of concealed macroblocks in the picture
+                        will be stored here
+
+        Returns:
+            pointer to the picture data
+            NULL if no pictures available for display
+
+------------------------------------------------------------------------------*/
+u32* h264bsdNextOutputPictureYCbCrA(storage_t *pStorage, u32 *picId, u32 *isIdrPic, u32 *numErrMbs)
+{
+    u32 width = h264bsdPicWidth(pStorage) * 16;
+    u32 height = h264bsdPicHeight(pStorage) * 16;
+    u8* data = h264bsdNextOutputPicture(pStorage, picId, isIdrPic, numErrMbs);
+    size_t rgbSize = sizeof(u32) * width * height;
+
+    if(data == NULL) return NULL;
+
+    if(pStorage->rgbConversionBufferSize < rgbSize)
+    {
+        if(pStorage->rgbConversionBuffer != NULL) free(pStorage->rgbConversionBuffer);
+        pStorage->rgbConversionBufferSize = rgbSize;
+        pStorage->rgbConversionBuffer = (u32*)malloc(rgbSize);
+    }
+
+    h264bsdConvertToYCbCrA(width, height, data, pStorage->rgbConversionBuffer);
+    return pStorage->rgbConversionBuffer;
+}
+
+/*------------------------------------------------------------------------------
+
     Function: h264bsdPicWidth
 
         Functional description:
@@ -1109,7 +1151,7 @@
             data        pointer to decoded image data
 
         Outputs:
-            rgbaData     pointer to the buffer where the RGBA data will be written
+            pOutput     pointer to the buffer where the RGBA data will be written
 
         Returns:
             none
@@ -1116,7 +1158,7 @@
 
 ------------------------------------------------------------------------------*/
 
-void h264bsdConvertToRGBA(u32 width, u32 height, u8* data, u32 *rgbaData)
+void h264bsdConvertToRGBA(u32 width, u32 height, u8* data, u32 *pOutput)
 {
     const int w = (int)width;
     const int h = (int)height;
@@ -1130,7 +1172,7 @@
     u8* luma = data;
     u8* cb = data + ySize;
     u8* cr = data + ySize + uSize;
-    u32* rgba = rgbaData;
+    u32* rgba = pOutput;
 
     while(y < h)
     {
@@ -1190,7 +1232,7 @@
             data        pointer to decoded image data
 
         Outputs:
-            bgraData     pointer to the buffer where the BGRA data will be written
+            pOutput     pointer to the buffer where the BGRA data will be written
 
         Returns:
             none
@@ -1197,7 +1239,7 @@
 
 ------------------------------------------------------------------------------*/
 
-void h264bsdConvertToBGRA(u32 width, u32 height, u8* data, u32 *bgraData)
+void h264bsdConvertToBGRA(u32 width, u32 height, u8* data, u32 *pOutput)
 {
     const int w = (int)width;
     const int h = (int)height;
@@ -1211,7 +1253,7 @@
     u8* luma = data;
     u8* cb = data + ySize;
     u8* cr = data + ySize + uSize;
-    u32* bgra = bgraData;
+    u32* bgra = pOutput;
 
     while(y < h)
     {
@@ -1232,6 +1274,86 @@
 
         ++x;
         ++bgra;
+        ++luma;
+
+        if(!(x & 1))
+        {
+            ++cb;
+            ++cr;
+        }
+
+        if(x < w) continue;
+
+        x = 0;
+        ++y;
+
+        if(y & 1)
+        {
+            cb -= w/2;
+            cr -= w/2;
+        }
+    }
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdConvertToYCbCrA
+
+        Functional description:
+            Convert decoded image data YCbCrA format.
+            YCbCrA is a 4:4:4 format that uses u32 pixels where the MSB is alpha.
+            *Note* While this function is available, it is not heavily optimized.
+            If possible, you should use decoded image data directly. 
+            This function should only be used when there is no other way to get YCbCrA data.
+
+        Inputs:
+            width       width of the image in pixels
+            height      height of the image in pixels
+            data        pointer to decoded image data
+
+        Outputs:
+            pOutput     pointer to the buffer where the YCbCrA data will be written
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdConvertToYCbCrA(u32 width, u32 height, u8* data, u32 *pOutput)
+{
+    const int w = (int)width;
+    const int h = (int)height;
+
+    int x = 0;
+    int y = 0;
+
+    size_t ySize = w * h;
+    size_t uSize = w/2 * h/2;
+
+    u8* luma = data;
+    u8* cb = data + ySize;
+    u8* cr = data + ySize + uSize;
+    u32* yCbCr = pOutput;
+
+    while(y < h)
+    {
+        int c =  - 16;
+        int d =  - 128;
+        int e = *cr - 128;
+
+        u32 r = (u32)CLIP1((298*c         + 409*e + 128) >> 8);
+        u32 g = (u32)CLIP1((298*c - 100*d - 208*e + 128) >> 8);
+        u32 b = (u32)CLIP1((298*c + 516*d         + 128) >> 8);
+
+        u32 pixel = 0xff;
+        pixel = (pixel << 8) + *cr;
+        pixel = (pixel << 8) + *cb;
+        pixel = (pixel << 8) + *luma;
+
+        *yCbCr = pixel;
+
+        ++x;
+        ++yCbCr;
         ++luma;
 
         if(!(x & 1))
--- a/src/h264bsd_decoder.h
+++ b/src/h264bsd_decoder.h
@@ -69,6 +69,7 @@
 u8* h264bsdNextOutputPicture(storage_t *pStorage, u32 *picId, u32 *isIdrPic, u32 *numErrMbs);
 u32* h264bsdNextOutputPictureRGBA(storage_t *pStorage, u32 *picId, u32 *isIdrPic, u32 *numErrMbs);
 u32* h264bsdNextOutputPictureBGRA(storage_t *pStorage, u32 *picId, u32 *isIdrPic, u32 *numErrMbs);
+u32* h264bsdNextOutputPictureYCbCrA(storage_t *pStorage, u32 *picId, u32 *isIdrPic, u32 *numErrMbs);
 
 u32 h264bsdPicWidth(storage_t *pStorage);
 u32 h264bsdPicHeight(storage_t *pStorage);
@@ -87,8 +88,9 @@
 storage_t* h264bsdAlloc();
 void h264bsdFree(storage_t *pStorage);
 
-void h264bsdConvertToRGBA(u32 width, u32 height, u8* data, u32 *rgbData);
-void h264bsdConvertToBGRA(u32 width, u32 height, u8* data, u32 *rgbData);
+void h264bsdConvertToRGBA(u32 width, u32 height, u8* data, u32 *pOutput);
+void h264bsdConvertToBGRA(u32 width, u32 height, u8* data, u32 *pOutput);
+void h264bsdConvertToYCbCrA(u32 width, u32 height, u8* data, u32 *pOutput);
 
 #endif /* #ifdef H264SWDEC_DECODER_H */