ref: 50eb34662722e6ee8d48f9f5109d4ea3e5d8cddc
parent: 2cbf64119c40de4896f033ac1fbfa63f406d1f14
author: Sam Leitch <sam.leitch@calgaryscientific.com>
date: Tue Mar 18 12:43:20 EDT 2014
Added 4:4:4 Y Cb Cr A output to hopefully speed up Flex.
binary files a/flex/lib/h264bsd_asm.swc b/flex/lib/h264bsd_asm.swc differ
--- a/flex/src/h264bsd/Decoder.as
+++ b/flex/src/h264bsd/Decoder.as
@@ -36,6 +36,7 @@
private var _h264bsdPicHeight:int = 0;
private var _h264bsdNextOutputPicture:int = 0;
private var _h264bsdNextOutputPictureBGRA:int = 0;
+ private var _h264bsdNextOutputPictureYCbCrA:int = 0;
private var _h264bsdDecode:int = 0;
private var _h264bsdShutdown:int = 0;
private var _h264bsdFree:int = 0;
@@ -131,6 +132,27 @@
return bytes;
}
+ public function getNextOutputPictureBytesYCbCrA():ByteArray {
+ var picIdPtr:int = CModule.malloc(4);
+ var isIdrPicPtr:int = CModule.malloc(4);
+ var numErrMbsPtr:int = CModule.malloc(4);
+
+ var bytesPtr:int = 0;
+ var args:Vector.<int> = new <int>[_storagePtr, picIdPtr, isIdrPicPtr, numErrMbsPtr];
+ bytesPtr = CModule.callI(_h264bsdNextOutputPictureYCbCrA, args);
+
+ var bytes:ByteArray = new ByteArray();
+ bytes.endian = Endian.LITTLE_ENDIAN;
+ CModule.readBytes(bytesPtr, outputByteLengthRGBA, bytes);
+ bytes.position = 0;
+
+ if (picIdPtr != 0) CModule.free(picIdPtr);
+ if (isIdrPicPtr != 0) CModule.free(isIdrPicPtr);
+ if (numErrMbsPtr != 0) CModule.free(numErrMbsPtr);
+
+ return bytes;
+ }
+
public function getNextOutputPictureBytes():ByteArray {
var picIdPtr:int = CModule.malloc(4);
var isIdrPicPtr:int = CModule.malloc(4);
@@ -154,55 +176,14 @@
public function drawNextOutputPicture(target:Bitmap, transform:Matrix = null):void
{
- var i420Bytes:ByteArray = getNextOutputPictureBytes();
+ var outputPictureBytes:ByteArray = getNextOutputPictureBytesYCbCrA();
var cinfo:CroppingInfo = getCroppingInfo();
var width:int = cinfo.uncroppedWidth;
var height:int = cinfo.uncroppedHeight;
-
- var yPtr:int = 0;
- var cbPtr:int = width * height;
- var crPtr:int = cbPtr + width / 2 * height / 2;
-
- var yuvVector:Vector.<uint> = new Vector.<uint>(width * height);
- var yuvPtr:int = 0;
-
- var y:int = 0;
- var x:int = 0;
-
- while(y < height)
- {
- var yuv:uint = 0xff;
- yuv = (yuv << 8) + i420Bytes[yPtr];
- yuv = (yuv << 8) + i420Bytes[cbPtr];
- yuv = (yuv << 8) + i420Bytes[crPtr];
-
- i420Bytes[yuvPtr] = yuv;
-
- ++x;
- ++yPtr;
- ++yuvPtr;
-
- if((x & 1) == 0)
- {
- ++cbPtr;
- ++crPtr;
- }
-
- if(x < width) continue;
-
- ++y;
- x = 0;
-
- if((y & 1) > 0)
- {
- cbPtr -= width / 2;
- crPtr -= width / 2;
- }
- }
-
+
var outputPicture:BitmapData = new BitmapData(width, height);
- outputPicture.setVector(new Rectangle(0,0, width, height), yuvVector);
+ outputPicture.setPixels(new Rectangle(0,0, width, height), outputPictureBytes);
var yuvFilter:ColorMatrixFilter = new ColorMatrixFilter(
[1.1643828125, 0, 1.59602734375, -.87078515625, 0,
@@ -264,6 +245,7 @@
_h264bsdPicHeight = CModule.getPublicSymbol("h264bsdPicHeight");
_h264bsdNextOutputPicture = CModule.getPublicSymbol("h264bsdNextOutputPicture");
_h264bsdNextOutputPictureBGRA = CModule.getPublicSymbol("h264bsdNextOutputPictureBGRA");
+ _h264bsdNextOutputPictureYCbCrA = CModule.getPublicSymbol("h264bsdNextOutputPictureYCbCrA");
_h264bsdDecode = CModule.getPublicSymbol("h264bsdDecode");
_h264bsdShutdown = CModule.getPublicSymbol("h264bsdShutdown");
_h264bsdFree = CModule.getPublicSymbol("h264bsdFree");
@@ -278,7 +260,8 @@
_h264bsdShutdown == 0 ||
_h264bsdFree == 0 ||
_h264bsdCroppingParams == 0 ||
- _h264bsdNextOutputPictureBGRA == 0) {
+ _h264bsdNextOutputPictureBGRA == 0 ||
+ _h264bsdNextOutputPictureYCbCrA == 0) {
throw new Error("One or more missing entries in h264bsd function table.");
}
}
--- a/src/h264bsd_decoder.c
+++ b/src/h264bsd_decoder.c
@@ -707,6 +707,48 @@
/*------------------------------------------------------------------------------
+ Function: h264bsdNextOutputPictureYCbCrA
+
+ Functional description:
+ Get next output picture in display order, converted to YCbCrA.
+ YCbCrA is a 4:4:4 format that uses u32 pixels where the MSB is alpha.
+
+ Inputs:
+ pStorage pointer to storage data structure
+
+ Outputs:
+ picId identifier of the picture will be stored here
+ isIdrPic IDR flag of the picture will be stored here
+ numErrMbs number of concealed macroblocks in the picture
+ will be stored here
+
+ Returns:
+ pointer to the picture data
+ NULL if no pictures available for display
+
+------------------------------------------------------------------------------*/
+u32* h264bsdNextOutputPictureYCbCrA(storage_t *pStorage, u32 *picId, u32 *isIdrPic, u32 *numErrMbs)
+{
+ u32 width = h264bsdPicWidth(pStorage) * 16;
+ u32 height = h264bsdPicHeight(pStorage) * 16;
+ u8* data = h264bsdNextOutputPicture(pStorage, picId, isIdrPic, numErrMbs);
+ size_t rgbSize = sizeof(u32) * width * height;
+
+ if(data == NULL) return NULL;
+
+ if(pStorage->rgbConversionBufferSize < rgbSize)
+ {
+ if(pStorage->rgbConversionBuffer != NULL) free(pStorage->rgbConversionBuffer);
+ pStorage->rgbConversionBufferSize = rgbSize;
+ pStorage->rgbConversionBuffer = (u32*)malloc(rgbSize);
+ }
+
+ h264bsdConvertToYCbCrA(width, height, data, pStorage->rgbConversionBuffer);
+ return pStorage->rgbConversionBuffer;
+}
+
+/*------------------------------------------------------------------------------
+
Function: h264bsdPicWidth
Functional description:
@@ -1109,7 +1151,7 @@
data pointer to decoded image data
Outputs:
- rgbaData pointer to the buffer where the RGBA data will be written
+ pOutput pointer to the buffer where the RGBA data will be written
Returns:
none
@@ -1116,7 +1158,7 @@
------------------------------------------------------------------------------*/
-void h264bsdConvertToRGBA(u32 width, u32 height, u8* data, u32 *rgbaData)
+void h264bsdConvertToRGBA(u32 width, u32 height, u8* data, u32 *pOutput)
{
const int w = (int)width;
const int h = (int)height;
@@ -1130,7 +1172,7 @@
u8* luma = data;
u8* cb = data + ySize;
u8* cr = data + ySize + uSize;
- u32* rgba = rgbaData;
+ u32* rgba = pOutput;
while(y < h)
{
@@ -1190,7 +1232,7 @@
data pointer to decoded image data
Outputs:
- bgraData pointer to the buffer where the BGRA data will be written
+ pOutput pointer to the buffer where the BGRA data will be written
Returns:
none
@@ -1197,7 +1239,7 @@
------------------------------------------------------------------------------*/
-void h264bsdConvertToBGRA(u32 width, u32 height, u8* data, u32 *bgraData)
+void h264bsdConvertToBGRA(u32 width, u32 height, u8* data, u32 *pOutput)
{
const int w = (int)width;
const int h = (int)height;
@@ -1211,7 +1253,7 @@
u8* luma = data;
u8* cb = data + ySize;
u8* cr = data + ySize + uSize;
- u32* bgra = bgraData;
+ u32* bgra = pOutput;
while(y < h)
{
@@ -1232,6 +1274,86 @@
++x;
++bgra;
+ ++luma;
+
+ if(!(x & 1))
+ {
+ ++cb;
+ ++cr;
+ }
+
+ if(x < w) continue;
+
+ x = 0;
+ ++y;
+
+ if(y & 1)
+ {
+ cb -= w/2;
+ cr -= w/2;
+ }
+ }
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdConvertToYCbCrA
+
+ Functional description:
+ Convert decoded image data YCbCrA format.
+ YCbCrA is a 4:4:4 format that uses u32 pixels where the MSB is alpha.
+ *Note* While this function is available, it is not heavily optimized.
+ If possible, you should use decoded image data directly.
+ This function should only be used when there is no other way to get YCbCrA data.
+
+ Inputs:
+ width width of the image in pixels
+ height height of the image in pixels
+ data pointer to decoded image data
+
+ Outputs:
+ pOutput pointer to the buffer where the YCbCrA data will be written
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdConvertToYCbCrA(u32 width, u32 height, u8* data, u32 *pOutput)
+{
+ const int w = (int)width;
+ const int h = (int)height;
+
+ int x = 0;
+ int y = 0;
+
+ size_t ySize = w * h;
+ size_t uSize = w/2 * h/2;
+
+ u8* luma = data;
+ u8* cb = data + ySize;
+ u8* cr = data + ySize + uSize;
+ u32* yCbCr = pOutput;
+
+ while(y < h)
+ {
+ int c = - 16;
+ int d = - 128;
+ int e = *cr - 128;
+
+ u32 r = (u32)CLIP1((298*c + 409*e + 128) >> 8);
+ u32 g = (u32)CLIP1((298*c - 100*d - 208*e + 128) >> 8);
+ u32 b = (u32)CLIP1((298*c + 516*d + 128) >> 8);
+
+ u32 pixel = 0xff;
+ pixel = (pixel << 8) + *cr;
+ pixel = (pixel << 8) + *cb;
+ pixel = (pixel << 8) + *luma;
+
+ *yCbCr = pixel;
+
+ ++x;
+ ++yCbCr;
++luma;
if(!(x & 1))
--- a/src/h264bsd_decoder.h
+++ b/src/h264bsd_decoder.h
@@ -69,6 +69,7 @@
u8* h264bsdNextOutputPicture(storage_t *pStorage, u32 *picId, u32 *isIdrPic, u32 *numErrMbs);
u32* h264bsdNextOutputPictureRGBA(storage_t *pStorage, u32 *picId, u32 *isIdrPic, u32 *numErrMbs);
u32* h264bsdNextOutputPictureBGRA(storage_t *pStorage, u32 *picId, u32 *isIdrPic, u32 *numErrMbs);
+u32* h264bsdNextOutputPictureYCbCrA(storage_t *pStorage, u32 *picId, u32 *isIdrPic, u32 *numErrMbs);
u32 h264bsdPicWidth(storage_t *pStorage);
u32 h264bsdPicHeight(storage_t *pStorage);
@@ -87,8 +88,9 @@
storage_t* h264bsdAlloc();
void h264bsdFree(storage_t *pStorage);
-void h264bsdConvertToRGBA(u32 width, u32 height, u8* data, u32 *rgbData);
-void h264bsdConvertToBGRA(u32 width, u32 height, u8* data, u32 *rgbData);
+void h264bsdConvertToRGBA(u32 width, u32 height, u8* data, u32 *pOutput);
+void h264bsdConvertToBGRA(u32 width, u32 height, u8* data, u32 *pOutput);
+void h264bsdConvertToYCbCrA(u32 width, u32 height, u8* data, u32 *pOutput);
#endif /* #ifdef H264SWDEC_DECODER_H */