shithub: openh264

Download patch

ref: c5f04cfbd4246dd98978db924fb596b2f73dcd9a
parent: 84ff16c0156e71de20af1cf149f26e3433213174
parent: 00a724076be885682ed7f5ae4e9009d834c0ad3b
author: volvet <qizh@cisco.com>
date: Fri Apr 25 15:05:12 EDT 2014

Merge pull request #750 from mstorsjo/deblocking-neon-cpu-features

Check for WELS_CPU_NEON before calling DeblockingBSCalcEnc_neon

--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -94,6 +94,7 @@
     int32_t iBeta, int8_t* iTc);
 typedef void (*PChromaDeblockingEQ4Func) (uint8_t* iSampleCb, uint8_t* iSampleCr, int32_t iStride, int32_t iAlpha,
     int32_t iBeta);
+typedef void (*PDeblockingBSCalc) (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType, int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag);
 
 typedef struct tagDeblockingFunc {
   PLumaDeblockingLT4Func    pfLumaDeblockingLT4Ver;
@@ -105,6 +106,8 @@
   PChromaDeblockingEQ4Func  pfChromaDeblockingEQ4Ver;
   PChromaDeblockingLT4Func  pfChromaDeblockingLT4Hor;
   PChromaDeblockingEQ4Func  pfChromaDeblockingEQ4Hor;
+
+  PDeblockingBSCalc         pfDeblockingBSCalc;
 } DeblockingFunc;
 
 typedef  void (*PSetNoneZeroCountZeroFunc) (int8_t* pNonZeroCount);
--- a/codec/encoder/core/src/deblocking.cpp
+++ b/codec/encoder/core/src/deblocking.cpp
@@ -583,6 +583,56 @@
   FilteringEdgeChromaHV (pfDeblocking, pCurMb, pFilter);
 }
 
+#if defined(HAVE_NEON) && defined(SINGLE_REF_FRAME)
+void DeblockingBSCalc_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
+                            int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
+  DeblockingBSCalcEnc_neon (pCurMb->pNonZeroCount, pCurMb->sMv, pCurMb->uiNeighborAvail, iMbStride, uiBS);
+  if (iLeftFlag) {
+    if (IS_INTRA ((pCurMb - 1)->uiMbType)) {
+      * (uint32_t*)uiBS[0][0] = 0x04040404;
+    }
+  } else {
+    * (uint32_t*)uiBS[0][0] = 0;
+  }
+  if (iTopFlag) {
+    if (IS_INTRA ((pCurMb - iMbStride)->uiMbType)) {
+      * (uint32_t*)uiBS[1][0] = 0x04040404;
+    }
+  } else {
+    * (uint32_t*)uiBS[1][0] = 0;
+  }
+}
+#endif
+
+void DeblockingBSCalc_c (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
+                         int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
+  if (iLeftFlag) {
+    * (uint32_t*)uiBS[0][0] = IS_INTRA ((pCurMb - 1)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (pCurMb,
+                              pCurMb - 1, 0);
+  } else {
+    * (uint32_t*)uiBS[0][0] = 0;
+  }
+  if (iTopFlag) {
+    * (uint32_t*)uiBS[1][0] = IS_INTRA ((pCurMb - iMbStride)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (
+                                pCurMb, (pCurMb - iMbStride), 1);
+  } else {
+    * (uint32_t*)uiBS[1][0] = 0;
+  }
+  //SKIP MB_16x16 or others
+  if (uiCurMbType != MB_TYPE_SKIP) {
+    pFunc->pfSetNZCZero (pCurMb->pNonZeroCount); // set all none-zero nzc to 1; dbk can be opti!
+
+    if (uiCurMbType == MB_TYPE_16x16) {
+      DeblockingBSInsideMBAvsbase (pCurMb->pNonZeroCount, uiBS, 1);
+    } else {
+      DeblockingBSInsideMBNormal (pCurMb, uiBS, pCurMb->pNonZeroCount);
+    }
+  } else {
+    * (uint32_t*)uiBS[0][1] = * (uint32_t*)uiBS[0][2] = * (uint32_t*)uiBS[0][3] =
+                                * (uint32_t*)uiBS[1][1] = * (uint32_t*)uiBS[1][2] = * (uint32_t*)uiBS[1][3] = 0;
+  }
+}
+
 void DeblockingMbAvcbase (SWelsFuncPtrList* pFunc, SMB* pCurMb, SDeblockingFilter* pFilter) {
   uint8_t uiBS[2][4][4] = {{{ 0 }}};
 
@@ -605,49 +655,7 @@
     DeblockingIntraMb (&pFunc->pfDeblocking, pCurMb, pFilter);
     break;
   default:
-#if (defined(HAVE_NEON) && defined(SINGLE_REF_FRAME))
-    DeblockingBSCalcEnc_neon(pCurMb->pNonZeroCount, pCurMb->sMv, pCurMb->uiNeighborAvail, iMbStride, uiBS);
-    if (iLeftFlag){
-      if (IS_INTRA((pCurMb-1)->uiMbType)) {
-        *(uint32_t*)uiBS[0][0] = 0x04040404;
-      }
-    } else {
-      *(uint32_t*)uiBS[0][0] = 0;
-    }
-    if (iTopFlag) {
-      if (IS_INTRA((pCurMb-iMbStride)->uiMbType)) {
-        *(uint32_t*)uiBS[1][0] = 0x04040404;
-      }
-    } else {
-      *(uint32_t*)uiBS[1][0] = 0;
-    }
-#else
-    if (iLeftFlag) {
-      * (uint32_t*)uiBS[0][0] = IS_INTRA ((pCurMb - 1)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (pCurMb,
-                                pCurMb - 1, 0);
-    } else {
-      * (uint32_t*)uiBS[0][0] = 0;
-    }
-    if (iTopFlag) {
-      * (uint32_t*)uiBS[1][0] = IS_INTRA ((pCurMb - iMbStride)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (
-                                  pCurMb, (pCurMb - iMbStride), 1);
-    } else {
-      * (uint32_t*)uiBS[1][0] = 0;
-    }
-    //SKIP MB_16x16 or others
-    if (uiCurMbType != MB_TYPE_SKIP) {
-      pFunc->pfSetNZCZero (pCurMb->pNonZeroCount); // set all none-zero nzc to 1; dbk can be opti!
-
-      if (uiCurMbType == MB_TYPE_16x16) {
-        DeblockingBSInsideMBAvsbase (pCurMb->pNonZeroCount, uiBS, 1);
-      } else {
-        DeblockingBSInsideMBNormal (pCurMb, uiBS, pCurMb->pNonZeroCount);
-      }
-    } else {
-      * (uint32_t*)uiBS[0][1] = * (uint32_t*)uiBS[0][2] = * (uint32_t*)uiBS[0][3] =
-                                  * (uint32_t*)uiBS[1][1] = * (uint32_t*)uiBS[1][2] = * (uint32_t*)uiBS[1][3] = 0;
-    }
-#endif
+    pFunc->pfDeblocking.pfDeblockingBSCalc (pFunc, pCurMb, uiBS, uiCurMbType, iMbStride, iLeftFlag, iTopFlag);
     DeblockingInterMb (&pFunc->pfDeblocking, pCurMb, pFilter, uiBS);
     break;
   }
@@ -803,7 +811,9 @@
   pFunc->pfChromaDeblockingLT4Hor	= DeblockChromaLt4H_c;
   pFunc->pfChromaDeblockingEQ4Hor	= DeblockChromaEq4H_c;
 
+  pFunc->pfDeblockingBSCalc             = DeblockingBSCalc_c;
 
+
 #ifdef X86_ASM
   if (iCpu & WELS_CPU_SSSE3) {
     pFunc->pfLumaDeblockingLT4Ver	= DeblockLumaLt4V_ssse3;
@@ -828,6 +838,10 @@
     pFunc->pfChromaDeblockingEQ4Ver     = DeblockChromaEq4V_neon;
     pFunc->pfChromaDeblockingLT4Hor     = DeblockChromaLt4H_neon;
     pFunc->pfChromaDeblockingEQ4Hor     = DeblockChromaEq4H_neon;
+
+#if defined(SINGLE_REF_FRAME)
+    pFunc->pfDeblockingBSCalc           = DeblockingBSCalc_neon;
+#endif
   }
 #endif
 }