shithub: openh264

Download patch

ref: 49ca1f76a6f8e4f747d5e52fba7737b70d5815f6
parent: 3f28643f3ce422d98f1e1b80afc88415cde79005
author: xiaotiansf <xiaotianshimail@gmail.com>
date: Wed Dec 11 06:45:27 EST 2019

Thread decoding  deblocking update by setting pNzc array for neighboring MBs.

--- a/codec/decoder/core/inc/deblocking.h
+++ b/codec/decoder/core/inc/deblocking.h
@@ -105,6 +105,13 @@
 
 void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag);
 
+inline int8_t* GetPNzc (PDqLayer pCurDqLayer, int32_t iMbXy) {
+  if (pCurDqLayer->pDec != NULL && pCurDqLayer->pDec->pNzc != NULL) {
+    return pCurDqLayer->pDec->pNzc[iMbXy];
+  }
+  return pCurDqLayer->pNzc[iMbXy];
+}
+
 } // namespace WelsDec
 
 #endif //WELS_DEBLOCKING_H__
--- a/codec/decoder/core/inc/picture.h
+++ b/codec/decoder/core/inc/picture.h
@@ -95,6 +95,7 @@
   int32_t iMbNum;
 
   bool*    pMbCorrectlyDecodedFlag;
+  int8_t (*pNzc)[24];
   uint32_t*  pMbType; // mb type used for direct mode
   int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A]; // used for direct mode
   int8_t (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM]; //used for direct mode
--- a/codec/decoder/core/src/deblocking.cpp
+++ b/codec/decoder/core/src/deblocking.cpp
@@ -576,7 +576,7 @@
     for (i = 0; i < 2; i++) {
       uint8_t uiNzc = 0;
       for (j = 0; uiNzc == 0 && j < 4; j++) {
-        uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]);
+        uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]);
       }
       if (uiNzc) {
         pBS[i << 1] = pBS[1 + (i << 1)] = 2;
@@ -595,10 +595,10 @@
     for (i = 0; i < 2; i++) {
       uint8_t uiNzc = 0;
       for (j = 0; uiNzc == 0 && j < 4; j++) {
-        uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)];
+        uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)];
       }
       for (j = 0; j < 2; j++) {
-        if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+        if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
           pBS[j + (i << 1)] = 2;
         } else {
           PPicture ref0, ref1;
@@ -616,10 +616,10 @@
     for (i = 0; i < 2; i++) {
       uint8_t uiNzc = 0;
       for (j = 0; uiNzc == 0 && j < 4; j++) {
-        uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)];
+        uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)];
       }
       for (j = 0; j < 2; j++) {
-        if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) {
+        if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) {
           pBS[j + (i << 1)] = 2;
         } else {
           PPicture ref0, ref1;
@@ -637,7 +637,7 @@
   } else {
     // only 4x4 transform
     for (i = 0; i < 4; i++) {
-      if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+      if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
         pBS[i] = 2;
       } else {
         PPicture ref0, ref1;
@@ -668,7 +668,7 @@
     for (i = 0; i < 2; i++) {
       uint8_t uiNzc = 0;
       for (j = 0; uiNzc == 0 && j < 4; j++) {
-        uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]);
+        uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]);
       }
       if (uiNzc) {
         pBS[i << 1] = pBS[1 + (i << 1)] = 2;
@@ -693,10 +693,10 @@
     for (i = 0; i < 2; i++) {
       uint8_t uiNzc = 0;
       for (j = 0; uiNzc == 0 && j < 4; j++) {
-        uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)];
+        uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)];
       }
       for (j = 0; j < 2; j++) {
-        if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+        if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
           pBS[j + (i << 1)] = 2;
         } else {
           pBS[j + (i << 1)] = 1;
@@ -720,10 +720,10 @@
     for (i = 0; i < 2; i++) {
       uint8_t uiNzc = 0;
       for (j = 0; uiNzc == 0 && j < 4; j++) {
-        uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)];
+        uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)];
       }
       for (j = 0; j < 2; j++) {
-        if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) {
+        if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) {
           pBS[j + (i << 1)] = 2;
         } else {
           pBS[j + (i << 1)] = 1;
@@ -745,7 +745,7 @@
   } else {
     // only 4x4 transform
     for (i = 0; i < 4; i++) {
-      if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+      if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
         pBS[i] = 2;
       } else {
         pBS[i] = 1;
@@ -1286,16 +1286,16 @@
     } else {
       if (IS_INTER_16x16 (iCurMbType)) {
         if (!pCurDqLayer->pTransformSize8x8Flag[pCurDqLayer->iMbXyIndex]) {
-          DeblockingBSInsideMBAvsbase (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
+          DeblockingBSInsideMBAvsbase (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1);
         } else {
-          DeblockingBSInsideMBAvsbase8x8 (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
+          DeblockingBSInsideMBAvsbase8x8 (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1);
         }
       } else {
 
         if (bBSlice) {
-          DeblockingBSliceBSInsideMBNormal (pFilter, pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex);
+          DeblockingBSliceBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex);
         } else {
-          DeblockingBSInsideMBNormal (pFilter, pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex);
+          DeblockingBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex);
         }
       }
     }
--- a/codec/decoder/core/src/decode_slice.cpp
+++ b/codec/decoder/core/src/decode_slice.cpp
@@ -1719,16 +1719,12 @@
 
       return ERR_INFO_MB_RECON_FAIL;
     }
-    int8_t pNzc[24];
+    memcpy (pCtx->pDec->pNzc[pCurDqLayer->iMbXyIndex], pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex], 24);
     if (pCtx->eSliceType != I_SLICE) {
-      memcpy (pNzc, pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex], 24);
       pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (
-        pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
+        pCtx->pDec->pNzc[pCurDqLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
     }
     WelsDeblockingFilterMB (pCurDqLayer, pFilter, iFilterIdc, pDeblockMb);
-    if (pCtx->eSliceType != I_SLICE) {
-      memcpy (pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex], pNzc, 24);
-    }
     if (pCtx->uiNalRefIdc > 0) {
       if (pCurDqLayer->iMbX == 0 || pCurDqLayer->iMbX == pCurDqLayer->iMbWidth - 1 || pCurDqLayer->iMbY == 0
           || pCurDqLayer->iMbY == pCurDqLayer->iMbHeight - 1) {
--- a/codec/decoder/core/src/pic_queue.cpp
+++ b/codec/decoder/core/src/pic_queue.cpp
@@ -113,7 +113,7 @@
   uint32_t uiMbCount = uiMbWidth * uiMbHeight;
 
   pPic->pMbCorrectlyDecodedFlag = (bool*)pMa->WelsMallocz (uiMbCount * sizeof (bool), "pPic->pMbCorrectlyDecodedFlag");
-
+  pPic->pNzc = GetThreadCount (pCtx) > 1 ? (int8_t (*)[24])pMa->WelsMallocz (uiMbCount * 24, "pPic->pNzc") : NULL;
   pPic->pMbType = (uint32_t*)pMa->WelsMallocz (uiMbCount * sizeof (uint32_t), "pPic->pMbType");
   pPic->pMv[LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof (
                         int16_t) * MV_A * MB_BLOCK4x4_NUM, "pPic->pMv[]");
@@ -145,6 +145,11 @@
     if (pPic->pMbCorrectlyDecodedFlag) {
       pMa->WelsFree (pPic->pMbCorrectlyDecodedFlag, "pPic->pMbCorrectlyDecodedFlag");
       pPic->pMbCorrectlyDecodedFlag = NULL;
+    }
+
+    if (pPic->pNzc) {
+      pMa->WelsFree (pPic->pNzc, "pPic->pNzc");
+      pPic->pNzc = NULL;
     }
 
     if (pPic->pMbType) {
--- a/test/api/thread_decoder_test.cpp
+++ b/test/api/thread_decoder_test.cpp
@@ -83,10 +83,10 @@
   unsigned char digest[SHA_DIGEST_LENGTH];
   SHA1Result (&ctx_, digest);
   if (!HasFatalFailure()) {
-    std::string p_hashStr(p.hashStr);
-    std::stringstream ss(p_hashStr);
+    std::string p_hashStr (p.hashStr);
+    std::stringstream ss (p_hashStr);
     std::string buf[4];
-    const char * hashStr[4];
+    const char* hashStr[4];
     int i = 0;
     while (i < 4 && ss >> buf[i]) {
       hashStr[i] = buf[i].c_str();
@@ -97,7 +97,7 @@
 }
 static const FileParam kFileParamArray[] = {
   {"res/Adobe_PDF_sample_a_1024x768_50Frms.264", "041434a5819d1d903d49c0eda884b345e9f83596"},
-  {"res/BA1_FT_C.264", "48d65bf8c731f29efc72f6222dd85b8ef7f636a7 463284ff3f5a0b1a0c829d47e9b73dcfe617c926"},
+  {"res/BA1_FT_C.264", "07490b43e8da6d9ef2fab066a0ac7491c7027297 3e7a012a01904cdc78c63ae20235665441b4e0a7"},
   {"res/BA1_Sony_D.jsv", "37c9a951a0348d6abe1880b59e2b5a4d7d18c94c"},
   {"res/BAMQ1_JVC_C.264", "6720462624f632f5475716ef32a7bbd12b3b428a 477b1e45e30661a138ff0b43c1ed3e00ded13d9c"},
   {"res/BAMQ2_JVC_C.264", "5f0fbb0dab7961e782224f6887c83d4866fc1af8 e3dfdc770fa5fee8b92f896a92214886c109a688"},
@@ -104,9 +104,9 @@
   {"res/BA_MW_D.264", "ace02cdce720bdb0698b40dc749a0e61fe0f590b"},
   {"res/BANM_MW_D.264", "c51f1d2fa63dba4f5787f1b726c056d1c01d6ab9"},
   {"res/BASQP1_Sony_C.jsv", "68e604b77e3f57f8ef1c2e450fcef03f5d2aee90 d5e1f122e8bf8d58bc6775d69b837db0d9ea3454"},
-  {"res/CI1_FT_B.264", "96042b70e212c8b253a786e865131ac89c133ca1 53d2a2f276a81b6ef869791373f7bc3928cc9ca3"},
+  {"res/CI1_FT_B.264", "2e4d613dddd7c15d0daaaa60ffc038001dfad017 05cb35998476202eb4a3e67d4314c0cca5d743f6"},
   {"res/CI_MW_D.264", "49a8916edd3e571efad328f2784fbe6aec5570d7"},
-  {"res/CVFC1_Sony_C.jsv", "109dfc8357a98b16aa74469a5506e362e563aa85 f29da8955c6f01d972dfe10b22c4f879aab05412"},
+  {"res/CVFC1_Sony_C.jsv", "109dfc8357a98b16aa74469a5506e362e563aa85 7d2a1c2e863baaaefff70ca4e6a62cb16f8792b2"},
   {"res/CVPCMNL1_SVA_C.264", "c2b0d964de727c64b9fccb58f63b567c82bda95a"},
   //{"res/LS_SVA_D.264", "72118f4d1674cf14e58bed7e67cb3aeed3df62b9"}, //DPB buffer is too small
   {"res/MIDR_MW_D.264", "aeded2be7b97484cbf25f367ec34208f2220a8ab"},
@@ -122,13 +122,13 @@
   {"res/QCIF_2P_I_allIPCM.264", "9879ce127d3263cfbaf5211ab6657dbf0ccabea8"},
   { "res/SVA_BA1_B.264", "4cb45a99ae44a0a98b174efd66245daa1fbaeb47 e9127875b268f9e7da4c495799b9972b8e72cf7b"},
   {"res/SVA_BA2_D.264", "ac9e960015b96f83279840802f6637c61ee1c5b8 719fe839fa68b915b614fbbbae15edf492cc2133"},
-  {"res/SVA_Base_B.264", "6015682a8f957bd499242150315cfd2fdf23e728 cd8c8ad018fe2fd7b3893f8175265b0eb7bcd342"},
+  {"res/SVA_Base_B.264", "a66d05c38b8f6e4e55e18237cac70b0c211a3b6e d8f923b278e6e9cbf51b495b29c2debe53526518"},
   {"res/SVA_CL1_E.264", "4fe09ab6cdc965ea10a20f1d6dd38aca954412bb"},
-  {"res/SVA_FM1_E.264", "164a9a2db58e7200ae60db8079d7201ac431887a fb33868ae38b3642edd309ad3005de4214fcc63f"},
+  {"res/SVA_FM1_E.264", "9aea4ea84c75adbdf884dcab3705ed5a96406e85 dc22699d39caf9eb1d32ecd4966869578d24cd86"},
   {"res/SVA_NL1_B.264", "6d63f72a0c0d833b1db0ba438afff3b4180fb3e6"},
   {"res/SVA_NL2_E.264", "70453ef8097c94dd190d6d2d1d5cb83c67e66238"},
   //{"res/SarVui.264", "1843d19d8e13588ef5de2d647804ae141e55cf72 719fe839fa68b915b614fbbbae15edf492cc2133"}, //same as "res/SVA_BA1_B.264"
-  {"res/Static.264", "d865faee7df56a8f532b7baeacb814483b8be148 52af285a888b8c9e04dc9f38fd61105e805ada3a"},
+  {"res/Static.264", "d865faee7df56a8f532b7baeacb814483b8be148 52af285a888b8c9e04dc9f38fd61105e805ada3a 1b6313262bff9c329aaf7dd3582525bd609c3974"},
   {"res/Zhling_1280x720.264", "10f9c803e80b51786f7833255afc3ef75c5c1339"},
   {"res/sps_subsetsps_bothVUI.264", "d65a34075c452196401340c554e83225c9454397"},
   //{"res/test_cif_I_CABAC_PCM.264", "dfe2f87ac76bdb58e227267907a2eeccf04715ad 02ac993be06b5d88118beb96ee5dfd0995b7cb00 95fdf21470d3bbcf95505abb2164042063a79d98 c2b42f489ca9c2ebc43c0ab2238551a0c958a692"},
@@ -138,8 +138,8 @@
   {"res/test_scalinglist_jm.264", "b36efd05c8b17faa23f1c071b92aa5d55a5a826f"},
   {"res/test_vd_1d.264", "15d8beaf991f9e5d56a854cdafc0a7abdd5bec69"},
   {"res/test_vd_rc.264", "cd6ef57fc884e5ecd9867591b01e35e3f091b8d0"},
-  {"res/Cisco_Men_whisper_640x320_CABAC_Bframe_9.264", "7df59855104a319b44a7611dd6c37b1670bf74c9"},
-  {"res/Cisco_Men_whisper_640x320_CAVLC_Bframe_9.264", "0d77e3c53f46d8962cd95b975e76d0f32613da0f"},
+  {"res/Cisco_Men_whisper_640x320_CABAC_Bframe_9.264", "7df59855104a319b44a7611dd6c37b1670bf74c9 f5593d374e8f68b1c882d407d961d80cf10ba737"},
+  {"res/Cisco_Men_whisper_640x320_CAVLC_Bframe_9.264", "0d77e3c53f46d8962cd95b975e76d0f32613da0f 67eec8abb0b22ff0f00d06c769b8a2e44cec33cf"},
   {"res/Cisco_Adobe_PDF_sample_a_1024x768_CAVLC_Bframe_9.264", "6cac61a6b58bba59b8e9944b18aba2df20efeca2"},
   {"res/VID_1280x544_cabac_temporal_direct.264", "e8ee8dd56ec5df1338f3c21ed8690d074c7ec03f"},
   {"res/VID_1280x720_cabac_temporal_direct.264", "1efa6aec8c5f953c53d713c31999420fdbd10b22"},