shithub: openh264

Download patch

ref: 1eb735299a4606c01b7abbb2f561e7a904e08c1c
parent: f623aa318de8d9697fc0e30cf8f4c503020ee286
parent: 96b2a8703062c1d87b7fcdd472ed0b5a846f13f9
author: sijchen <sijchen@cisco.com>
date: Mon May 16 06:59:35 EDT 2016

Merge pull request #2458 from ruil2/downsampling2

    add one new downsampling algorithms

--- a/codec/processing/src/downsample/downsample.cpp
+++ b/codec/processing/src/downsample/downsample.cpp
@@ -34,8 +34,9 @@
 #include "cpu.h"
 
 WELSVP_NAMESPACE_BEGIN
+#define MAX_SAMPLE_WIDTH 1920
+#define MAX_SAMPLE_HEIGHT 1088
 
-
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 CDownsampling::CDownsampling (int32_t iCpuFlag) {
@@ -43,11 +44,37 @@
   m_eMethod   = METHOD_DOWNSAMPLE;
   WelsMemset (&m_pfDownsample, 0, sizeof (m_pfDownsample));
   InitDownsampleFuncs (m_pfDownsample, m_iCPUFlag);
+  m_bNoSampleBuffer = AllocateSampleBuffer();
 }
 
 CDownsampling::~CDownsampling() {
+  FreeSampleBuffer();
 }
+bool CDownsampling::AllocateSampleBuffer() {
+  for (int32_t i = 0; i < 2; i++) {
+    m_pSampleBuffer[i][0] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT);
+    if (!m_pSampleBuffer[i][0])
+      goto FREE_RET;
+    m_pSampleBuffer[i][1] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT / 4);
+    if (!m_pSampleBuffer[i][1])
+      goto FREE_RET;
+    m_pSampleBuffer[i][2] = (uint8_t*)WelsMalloc (MAX_SAMPLE_WIDTH * MAX_SAMPLE_HEIGHT / 4);
+    if (!m_pSampleBuffer[i][2])
+      goto FREE_RET;
+  }
+  return false;
+FREE_RET:
+  FreeSampleBuffer();
+  return true;
 
+}
+void CDownsampling::FreeSampleBuffer() {
+  for (int32_t i = 0; i < 2; i++) {
+    WelsFree (m_pSampleBuffer[i][0]);
+    WelsFree (m_pSampleBuffer[i][1]);
+    WelsFree (m_pSampleBuffer[i][2]);
+  }
+}
 void CDownsampling::InitDownsampleFuncs (SDownsampleFuncs& sDownsampleFunc,  int32_t iCpuFlag) {
   sDownsampleFunc.pfHalfAverage[0] = DyadicBilinearDownsampler_c;
   sDownsampleFunc.pfHalfAverage[1] = DyadicBilinearDownsampler_c;
@@ -123,49 +150,132 @@
   if (iSrcWidthY <= iDstWidthY || iSrcHeightY <= iDstHeightY) {
     return RET_INVALIDPARAM;
   }
+  if (iSrcWidthY > MAX_SAMPLE_WIDTH || iSrcHeightY > MAX_SAMPLE_HEIGHT || m_bNoSampleBuffer) {
+    if ((iSrcWidthY >> 1) == iDstWidthY && (iSrcHeightY >> 1) == iDstHeightY) {
+      // use half average functions
+      uint8_t iAlignIndex = GetAlignedIndex (iSrcWidthY);
+      m_pfDownsample.pfHalfAverage[iAlignIndex] ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
+          (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
 
-  if ((iSrcWidthY >> 1) == iDstWidthY && (iSrcHeightY >> 1) == iDstHeightY) {
-    // use half average functions
-    uint8_t iAlignIndex = GetAlignedIndex (iSrcWidthY);
-    m_pfDownsample.pfHalfAverage[iAlignIndex] ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
-        (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
+      iAlignIndex = GetAlignedIndex (iSrcWidthUV);
+      m_pfDownsample.pfHalfAverage[iAlignIndex] ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
+          (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
+      m_pfDownsample.pfHalfAverage[iAlignIndex] ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
+          (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
+    } else if ((iSrcWidthY >> 2) == iDstWidthY && (iSrcHeightY >> 2) == iDstHeightY) {
 
-    iAlignIndex = GetAlignedIndex (iSrcWidthUV);
-    m_pfDownsample.pfHalfAverage[iAlignIndex] ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
-        (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
-    m_pfDownsample.pfHalfAverage[iAlignIndex] ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
-        (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
-  } else if ((iSrcWidthY >> 2) == iDstWidthY && (iSrcHeightY >> 2) == iDstHeightY) {
+      m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
+                                           (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
 
-    m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
-                                         (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
+      m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
+                                           (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
 
-    m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
-                                         (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
+      m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
+                                           (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
 
-    m_pfDownsample.pfQuarterDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
-                                         (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
+    } else if ((iSrcWidthY / 3) == iDstWidthY && (iSrcHeightY / 3) == iDstHeightY) {
 
-  } else if ((iSrcWidthY / 3) == iDstWidthY && (iSrcHeightY / 3) == iDstHeightY) {
+      m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
+                                            (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iDstHeightY);
 
-    m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
-                                          (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iDstHeightY);
+      m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
+                                            (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iDstHeightUV);
 
-    m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
-                                          (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iDstHeightUV);
+      m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
+                                            (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iDstHeightUV);
 
-    m_pfDownsample.pfOneThirdDownsampler ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
-                                          (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iDstHeightUV);
+    } else {
+      m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY,
+                                         (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
 
+      m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV,
+                                           (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
+
+      m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV,
+                                           (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
+    }
   } else {
-    m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY,
-                                       (uint8_t*)pSrcPixMap->pPixel[0], pSrcPixMap->iStride[0], iSrcWidthY, iSrcHeightY);
 
-    m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV,
-                                         (uint8_t*)pSrcPixMap->pPixel[1], pSrcPixMap->iStride[1], iSrcWidthUV, iSrcHeightUV);
+    int32_t iIdx = 0;
+    int32_t iHalfSrcWidth = iSrcWidthY >> 1;
+    int32_t iHalfSrcHeight = iSrcHeightY >> 1;
+    uint8_t* pSrcY = (uint8_t*)pSrcPixMap->pPixel[0];
+    uint8_t* pSrcU = (uint8_t*)pSrcPixMap->pPixel[1];
+    uint8_t* pSrcV = (uint8_t*)pSrcPixMap->pPixel[2];
+    int32_t iSrcStrideY = pSrcPixMap->iStride[0];
+    int32_t iSrcStrideU = pSrcPixMap->iStride[1];
+    int32_t iSrcStrideV = pSrcPixMap->iStride[2];
 
-    m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV,
-                                         (uint8_t*)pSrcPixMap->pPixel[2], pSrcPixMap->iStride[2], iSrcWidthUV, iSrcHeightUV);
+    int32_t iDstStrideY = pDstPixMap->iStride[0];
+    int32_t iDstStrideU = pDstPixMap->iStride[1];
+    int32_t iDstStrideV = pDstPixMap->iStride[2];
+
+    uint8_t* pDstY = (uint8_t*)m_pSampleBuffer[iIdx][0];
+    uint8_t* pDstU = (uint8_t*)m_pSampleBuffer[iIdx][1];
+    uint8_t* pDstV = (uint8_t*)m_pSampleBuffer[iIdx][2];
+    iIdx++;
+    do {
+      if ((iHalfSrcWidth == iDstWidthY) && (iHalfSrcHeight == iDstHeightY)) { //end
+        // use half average functions
+        uint8_t iAlignIndex = GetAlignedIndex (iSrcWidthY);
+        m_pfDownsample.pfHalfAverage[iAlignIndex] ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0],
+            (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY);
+
+        iAlignIndex = GetAlignedIndex (iSrcWidthUV);
+        m_pfDownsample.pfHalfAverage[iAlignIndex] ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1],
+            (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV);
+        m_pfDownsample.pfHalfAverage[iAlignIndex] ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2],
+            (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV);
+        break;
+      } else if (((iHalfSrcWidth >> 1) >= iDstWidthY) && ((iHalfSrcHeight >> 1) >= iDstHeightY)) {
+        // use half average functions
+        iDstStrideY = iHalfSrcWidth;
+        iDstStrideU = iHalfSrcWidth >> 1;
+        iDstStrideV = iHalfSrcWidth >> 1;
+        uint8_t iAlignIndex = GetAlignedIndex (iSrcWidthY);
+        m_pfDownsample.pfHalfAverage[iAlignIndex] ((uint8_t*)pDstY, iDstStrideY,
+            (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY);
+
+        iAlignIndex = GetAlignedIndex (iSrcWidthUV);
+        m_pfDownsample.pfHalfAverage[iAlignIndex] ((uint8_t*)pDstU, iDstStrideU,
+            (uint8_t*)pSrcU, iSrcStrideU, iSrcWidthUV, iSrcHeightUV);
+        m_pfDownsample.pfHalfAverage[iAlignIndex] ((uint8_t*)pDstV, iDstStrideV,
+            (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV);
+
+        pSrcY = (uint8_t*)pDstY;
+        pSrcU = (uint8_t*)pDstU;
+        pSrcV = (uint8_t*)pDstV;
+
+
+        iSrcWidthY = iHalfSrcWidth;
+        iSrcWidthUV = iHalfSrcWidth >> 1;
+        iSrcHeightY = iHalfSrcHeight;
+        iSrcHeightUV = iHalfSrcHeight >> 1;
+
+        iSrcStrideY = iSrcWidthY;
+        iSrcStrideU = iSrcWidthUV;
+        iSrcStrideV = iSrcWidthUV;
+
+        iHalfSrcWidth >>= 1;
+        iHalfSrcHeight >>= 1;
+
+        iIdx = iIdx % 2;
+        pDstY = (uint8_t*)m_pSampleBuffer[iIdx][0];
+        pDstU = (uint8_t*)m_pSampleBuffer[iIdx][1];
+        pDstV = (uint8_t*)m_pSampleBuffer[iIdx][2];
+        iIdx++;
+      } else {
+        m_pfDownsample.pfGeneralRatioLuma ((uint8_t*)pDstPixMap->pPixel[0], pDstPixMap->iStride[0], iDstWidthY, iDstHeightY,
+                                           (uint8_t*)pSrcY, iSrcStrideY, iSrcWidthY, iSrcHeightY);
+
+        m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[1], pDstPixMap->iStride[1], iDstWidthUV, iDstHeightUV,
+                                             (uint8_t*)pSrcU, iSrcStrideU,  iSrcWidthUV, iSrcHeightUV);
+
+        m_pfDownsample.pfGeneralRatioChroma ((uint8_t*)pDstPixMap->pPixel[2], pDstPixMap->iStride[2], iDstWidthUV, iDstHeightUV,
+                                             (uint8_t*)pSrcV, iSrcStrideV, iSrcWidthUV, iSrcHeightUV);
+        break;
+      }
+    } while (true);
   }
   return RET_SUCCESS;
 }
--- a/codec/processing/src/downsample/downsample.h
+++ b/codec/processing/src/downsample/downsample.h
@@ -170,10 +170,13 @@
   void InitDownsampleFuncs (SDownsampleFuncs& sDownsampleFunc, int32_t iCpuFlag);
 
   int32_t GetAlignedIndex (const int32_t kiSrcWidth);
-
+  bool AllocateSampleBuffer();
+  void FreeSampleBuffer();
  private:
   SDownsampleFuncs m_pfDownsample;
   int32_t  m_iCPUFlag;
+  uint8_t  *m_pSampleBuffer[2][3];
+  bool     m_bNoSampleBuffer;
 };
 
 WELSVP_NAMESPACE_END