shithub: openh264

Download patch

ref: 38a3fada2456ab8480731f85fbeb01e01c6cb575
parent: 1b9aae843437385bed9efd96131fdba509107589
parent: 636df2bebbace0663684f6830db360190d9791b9
author: volvet <qizh@cisco.com>
date: Fri Mar 7 11:47:38 EST 2014

Merge pull request #435 from mstorsjo/threadlib-wait-single-unix

Make WelsMultipleEventsWaitSingleBlocking usable on unix as well

--- a/codec/common/WelsThreadLib.cpp
+++ b/codec/common/WelsThreadLib.cpp
@@ -121,12 +121,14 @@
 }
 
 WELS_THREAD_ERROR_CODE    WelsMultipleEventsWaitSingleBlocking (uint32_t nCount,
-    WELS_EVENT* event_list,
-    uint32_t dwMilliseconds) {
-  return WaitForMultipleObjects (nCount, event_list, FALSE, dwMilliseconds);
+    WELS_EVENT* event_list, WELS_EVENT* master_event) {
+  // Don't need/use the master event for anything, since windows has got WaitForMultipleObjects
+  return WaitForMultipleObjects (nCount, event_list, FALSE, INFINITE);
 }
 
-WELS_THREAD_ERROR_CODE    WelsMultipleEventsWaitAllBlocking (uint32_t nCount, WELS_EVENT* event_list) {
+WELS_THREAD_ERROR_CODE    WelsMultipleEventsWaitAllBlocking (uint32_t nCount,
+    WELS_EVENT* event_list, WELS_EVENT* master_event) {
+  // Don't need/use the master event for anything, since windows has got WaitForMultipleObjects
   return WaitForMultipleObjects (nCount, event_list, TRUE, INFINITE);
 }
 
@@ -342,14 +344,25 @@
 }
 
 WELS_THREAD_ERROR_CODE    WelsMultipleEventsWaitSingleBlocking (uint32_t nCount,
-    WELS_EVENT* event_list,
-    uint32_t dwMilliseconds) {
+    WELS_EVENT* event_list, WELS_EVENT* master_event) {
   uint32_t nIdx = 0;
-  const uint32_t kuiAccessTime = 2;	// 2 us once
+  uint32_t uiAccessTime = 2;	// 2 us once
 
   if (nCount == 0)
     return WELS_THREAD_ERROR_WAIT_FAILED;
 
+  if (master_event != NULL) {
+    // This design relies on the events actually being semaphores;
+    // if multiple events in the list have been signalled, the master
+    // event should have a similar count (events in windows can't keep
+    // track of the actual count, but the master event isn't needed there
+    // since it uses WaitForMultipleObjects).
+    int32_t err = sem_wait (*master_event);
+    if (err != WELS_THREAD_ERROR_OK)
+      return err;
+    uiAccessTime = 0; // no blocking, just quickly loop through all to find the one that was signalled
+  }
+
   while (1) {
     nIdx = 0;	// access each event by order
     while (nIdx < nCount) {
@@ -364,9 +377,9 @@
         err = sem_trywait (event_list[nIdx]);
         if (WELS_THREAD_ERROR_OK == err)
           return WELS_THREAD_ERROR_WAIT_OBJECT_0 + nIdx;
-        else if (wait_count > 0)
+        else if (wait_count > 0 || uiAccessTime == 0)
           break;
-        usleep (kuiAccessTime);
+        usleep (uiAccessTime);
         ++ wait_count;
       } while (1);
       // we do need access next event next time
@@ -373,12 +386,21 @@
       ++ nIdx;
     }
     usleep (1);	// switch to working threads
+    if (master_event != NULL) {
+      // A master event was used and was signalled, but none of the events in the
+      // list was found to be signalled, thus wait a little more when rechecking
+      // the list to avoid busylooping here.
+      // If we ever hit this codepath it's mostly a bug in the code that signals
+      // the events.
+      uiAccessTime = 2;
+    }
   }
 
   return WELS_THREAD_ERROR_WAIT_FAILED;
 }
 
-WELS_THREAD_ERROR_CODE    WelsMultipleEventsWaitAllBlocking (uint32_t nCount, WELS_EVENT* event_list) {
+WELS_THREAD_ERROR_CODE    WelsMultipleEventsWaitAllBlocking (uint32_t nCount,
+    WELS_EVENT* event_list, WELS_EVENT* master_event) {
   uint32_t nIdx = 0;
   uint32_t uiCountSignals = 0;
   uint32_t uiSignalFlag	= 0;	// UGLY: suppose maximal event number up to 32
@@ -394,7 +416,21 @@
       if ((uiSignalFlag & kuiBitwiseFlag) != kuiBitwiseFlag) { // non-blocking mode
         int32_t err = 0;
 //				fprintf( stderr, "sem_wait(): start to wait event %d..\n", nIdx );
-        err = sem_wait (event_list[nIdx]);
+        if (master_event == NULL) {
+          err = sem_wait (event_list[nIdx]);
+        } else {
+          err = sem_wait (*master_event);
+          if (err == WELS_THREAD_ERROR_OK) {
+            err = sem_wait (event_list[nIdx]);
+            if (err != WELS_THREAD_ERROR_OK) {
+              // We successfully waited for the master event,
+              // but waiting for the individual event failed (e.g. EINTR?).
+              // Increase the master event count so that the next retry will
+              // work as intended.
+              sem_post (*master_event);
+            }
+          }
+        }
 //				fprintf( stderr, "sem_wait(): wait event %d result %d errno %d..\n", nIdx, err, errno );
         if (WELS_THREAD_ERROR_OK == err) {
 //					int32_t val = 0;
--- a/codec/common/WelsThreadLib.h
+++ b/codec/common/WelsThreadLib.h
@@ -111,8 +111,9 @@
 WELS_THREAD_ERROR_CODE    WelsEventWait (WELS_EVENT* event);
 WELS_THREAD_ERROR_CODE    WelsEventWaitWithTimeOut (WELS_EVENT* event, uint32_t dwMilliseconds);
 WELS_THREAD_ERROR_CODE    WelsMultipleEventsWaitSingleBlocking (uint32_t nCount, WELS_EVENT* event_list,
-    uint32_t dwMilliseconds);
-WELS_THREAD_ERROR_CODE    WelsMultipleEventsWaitAllBlocking (uint32_t nCount, WELS_EVENT* event_list);
+    WELS_EVENT* master_event = NULL);
+WELS_THREAD_ERROR_CODE    WelsMultipleEventsWaitAllBlocking (uint32_t nCount, WELS_EVENT* event_list,
+    WELS_EVENT* master_event = NULL);
 
 WELS_THREAD_ERROR_CODE    WelsThreadCreate (WELS_THREAD_HANDLE* thread,  LPWELS_THREAD_ROUTINE  routine,
     void* arg, WELS_THREAD_ATTR attr);
--- a/codec/encoder/core/inc/mt_defs.h
+++ b/codec/encoder/core/inc/mt_defs.h
@@ -94,15 +94,14 @@
 char eventNamespace[100];
 WELS_THREAD_HANDLE			pThreadHandles[MAX_THREADS_NUM];// thread handles, [iThreadIdx]
 WELS_EVENT					pSliceCodedEvent[MAX_THREADS_NUM];// events for slice coded state, [iThreadIdx]
+WELS_EVENT					pSliceCodedMasterEvent;	// events for signalling that some event in pSliceCodedEvent has been signalled
 WELS_EVENT					pReadySliceCodingEvent[MAX_THREADS_NUM];	// events for slice coding ready, [iThreadIdx]
 WELS_EVENT					pUpdateMbListEvent[MAX_THREADS_NUM];		// signal to update mb list neighbor for various slices
 WELS_EVENT					pFinUpdateMbListEvent[MAX_THREADS_NUM];	// signal to indicate finish updating mb list
+WELS_EVENT					pExitEncodeEvent[MAX_THREADS_NUM];			// event for exit encoding event
+WELS_EVENT					pThreadMasterEvent[MAX_THREADS_NUM];	// event for indicating that some event has been signalled to the thread
 #ifdef _WIN32
 WELS_EVENT					pFinSliceCodingEvent[MAX_THREADS_NUM];	// notify slice coding thread is done
-WELS_EVENT					pExitEncodeEvent[MAX_THREADS_NUM];			// event for exit encoding event
-#else
-
-WELS_THREAD_HANDLE			pUpdateMbListThrdHandles[MAX_THREADS_NUM];	// thread handles for update mb list thread, [iThreadIdx]
 #endif//_WIN32
 
 WELS_MUTEX					mutexSliceNumUpdate;	// for dynamic slicing mode MT
--- a/codec/encoder/core/inc/slice_multi_threading.h
+++ b/codec/encoder/core/inc/slice_multi_threading.h
@@ -83,7 +83,7 @@
 
 int32_t CreateSliceThreads (sWelsEncCtx* pCtx);
 
-int32_t FiredSliceThreads (SSliceThreadPrivateData* pPriData, WELS_EVENT* pEventsList, SLayerBSInfo* pLayerBsInfo,
+int32_t FiredSliceThreads (SSliceThreadPrivateData* pPriData, WELS_EVENT* pEventsList, WELS_EVENT* pMasterEventsList, SLayerBSInfo* pLayerBsInfo,
                            const uint32_t kuiNumThreads/*, int32_t *iLayerNum*/, SSliceCtx* pSliceCtx, const bool kbIsDynamicSlicingMode);
 
 int32_t DynamicDetectCpuCores();
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -2157,6 +2157,7 @@
       do {
         if ((*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx] != NULL)	// iThreadIdx is already created successfully
           WelsEventSignal (& (*ppCtx)->pSliceThreading->pExitEncodeEvent[iThreadIdx]);
+          WelsEventSignal (& (*ppCtx)->pSliceThreading->pThreadMasterEvent[iThreadIdx]);
         ++ iThreadIdx;
       } while (iThreadIdx < iThreadCount);
 
@@ -2175,15 +2176,6 @@
                  res);
         (*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx] = 0;
       }
-      if ((*ppCtx)->pSliceThreading->pUpdateMbListThrdHandles[iThreadIdx]) {
-        res = WelsThreadCancel ((*ppCtx)->pSliceThreading->pUpdateMbListThrdHandles[iThreadIdx]);
-        WelsLog (*ppCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), WelsThreadCancel(pUpdateMbListThrdHandles%d) return %d..\n",
-                 iThreadIdx, res);
-        res = WelsThreadJoin ((*ppCtx)->pSliceThreading->pUpdateMbListThrdHandles[iThreadIdx]);	// waiting thread exit
-        WelsLog (*ppCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), pthread_join(pUpdateMbListThrdHandles%d) return %d..\n",
-                 iThreadIdx, res);
-        (*ppCtx)->pSliceThreading->pUpdateMbListThrdHandles[iThreadIdx] = 0;
-      }
       ++ iThreadIdx;
     }
 #endif//WIN32
@@ -3217,6 +3209,7 @@
           pCtx->iActiveThreadsNum	= iSliceCount;
           // to fire slice coding threads
           err = FiredSliceThreads (&pCtx->pSliceThreading->pThreadPEncCtx[0], &pCtx->pSliceThreading->pReadySliceCodingEvent[0],
+                                   &pCtx->pSliceThreading->pThreadMasterEvent[0],
                                    pLayerBsInfo, iSliceCount, pCtx->pCurDqLayer->pSliceEncCtx, false);
           if (err) {
             WelsLog (pCtx, WELS_LOG_ERROR,
@@ -3225,7 +3218,7 @@
             return ENC_RETURN_UNEXPECTED;
           }
 
-          WelsMultipleEventsWaitAllBlocking (iSliceCount, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
+          WelsMultipleEventsWaitAllBlocking (iSliceCount, &pCtx->pSliceThreading->pSliceCodedEvent[0], &pCtx->pSliceThreading->pSliceCodedMasterEvent);
 
 
           // all slices are finished coding here
@@ -3254,6 +3247,7 @@
           iNumThreadsRunning		= iNumThreadsScheduled;
           // to fire slice coding threads
           err = FiredSliceThreads (&pCtx->pSliceThreading->pThreadPEncCtx[0], &pCtx->pSliceThreading->pReadySliceCodingEvent[0],
+                                   &pCtx->pSliceThreading->pThreadMasterEvent[0],
                                    pLayerBsInfo, iNumThreadsRunning, pCtx->pCurDqLayer->pSliceEncCtx, false);
           if (err) {
             WelsLog (pCtx, WELS_LOG_ERROR,
@@ -3266,13 +3260,12 @@
           while (1) {
             if (iIndexOfSliceToBeCoded >= iSliceCount && iNumThreadsRunning <= 0)
               break;
-#ifdef _WIN32
             WELS_THREAD_ERROR_CODE lwait	= 0;
             int32_t iEventId				= -1;
 
             lwait = WelsMultipleEventsWaitSingleBlocking (iNumThreadsScheduled,
                     &pCtx->pSliceThreading->pSliceCodedEvent[0],
-                    (uint32_t) -1);
+                    &pCtx->pSliceThreading->pSliceCodedMasterEvent);
             iEventId = (int32_t) (lwait - WELS_THREAD_ERROR_WAIT_OBJECT_0);
             if (iEventId >= 0 && iEventId < iNumThreadsScheduled) {
               if (iIndexOfSliceToBeCoded < iSliceCount) {
@@ -3280,6 +3273,7 @@
                 // thread_id equal to iEventId per implementation here
                 pCtx->pSliceThreading->pThreadPEncCtx[iEventId].iSliceIndex	= iIndexOfSliceToBeCoded;
                 WelsEventSignal (&pCtx->pSliceThreading->pReadySliceCodingEvent[iEventId]);
+                WelsEventSignal (&pCtx->pSliceThreading->pThreadMasterEvent[iEventId]);
 
                 ++ iIndexOfSliceToBeCoded;
               } else {	// no other slices left for coding
@@ -3286,29 +3280,6 @@
                 -- iNumThreadsRunning;
               }
             }
-#else
-            // TODO for pthread platforms
-            // alternate implementation using blocking due non-blocking with timeout mode not support at wels thread lib, tune back if available
-            WelsMultipleEventsWaitAllBlocking (iNumThreadsRunning, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
-            WELS_VERIFY_RETURN_IFNEQ(pCtx->iEncoderError, ENC_RETURN_SUCCESS)
-            if (iIndexOfSliceToBeCoded < iSliceCount) {
-              int32_t iThreadIdx = 0;
-              // pick up succeeding slices for threading if left
-              while (iThreadIdx < iNumThreadsScheduled) {
-                if (iIndexOfSliceToBeCoded >= iSliceCount)
-                  break;
-                pCtx->pSliceThreading->pThreadPEncCtx[iThreadIdx].iSliceIndex = iIndexOfSliceToBeCoded;
-                WelsEventSignal (&pCtx->pSliceThreading->pReadySliceCodingEvent[iThreadIdx]);
-
-                ++ iIndexOfSliceToBeCoded;
-                ++ iThreadIdx;
-              }
-              // update iNumThreadsRunning
-              iNumThreadsRunning		= iThreadIdx;
-            } else {
-              iNumThreadsRunning = 0;
-            }
-#endif//_WIN32
           }//while(1)
 
           // all slices are finished coding here
@@ -3322,6 +3293,7 @@
 
         // to fire slice coding threads
         err = FiredSliceThreads (&pCtx->pSliceThreading->pThreadPEncCtx[0], &pCtx->pSliceThreading->pReadySliceCodingEvent[0],
+                                 &pCtx->pSliceThreading->pThreadMasterEvent[0],
                                  pLayerBsInfo, kiPartitionCnt, pCtx->pCurDqLayer->pSliceEncCtx, true);
         if (err) {
           WelsLog (pCtx, WELS_LOG_ERROR,
@@ -3330,7 +3302,7 @@
           return ENC_RETURN_UNEXPECTED;
         }
 
-        WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0]);
+        WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0], &pCtx->pSliceThreading->pSliceCodedMasterEvent);
         WELS_VERIFY_RETURN_IFNEQ(pCtx->iEncoderError, ENC_RETURN_SUCCESS)
 
         iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, kiPartitionCnt);
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -275,6 +275,7 @@
     int32_t iThreadIdx			= 0;
     do {
       WelsEventSignal (&pCtx->pSliceThreading->pUpdateMbListEvent[iThreadIdx]);
+      WelsEventSignal (&pCtx->pSliceThreading->pThreadMasterEvent[iThreadIdx]);
       ++ iThreadIdx;
     } while (iThreadIdx < kiThreadNum);
 
@@ -351,10 +352,11 @@
 
   MT_TRACE_LOG ((*ppCtx), WELS_LOG_INFO, "encpEncCtx= 0x%p\n", (void*) (*ppCtx));
 
+  char name[SEM_NAME_MAX] = {0};
+  WELS_THREAD_ERROR_CODE err = 0;
+
   iIdx = 0;
   while (iIdx < iThreadNum) {
-    char name[SEM_NAME_MAX] = {0};
-    WELS_THREAD_ERROR_CODE err = 0;
     pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx	= (void*) (*ppCtx);
     pSmt->pThreadPEncCtx[iIdx].iSliceIndex	= iIdx;
     pSmt->pThreadPEncCtx[iIdx].iThreadIndex	= iIdx;
@@ -364,10 +366,13 @@
     WelsSnprintf (name, SEM_NAME_MAX, "fs%d%s", iIdx, pSmt->eventNamespace);
     err = WelsEventOpen (&pSmt->pFinSliceCodingEvent[iIdx], name);
     MT_TRACE_LOG ((*ppCtx), WELS_LOG_INFO, "[MT] Open pFinSliceCodingEvent%d named(%s) ret%d err%d\n", iIdx, name, err, errno);
+#endif//_WIN32
     WelsSnprintf (name, SEM_NAME_MAX, "ee%d%s", iIdx, pSmt->eventNamespace);
     err = WelsEventOpen (&pSmt->pExitEncodeEvent[iIdx], name);
     MT_TRACE_LOG ((*ppCtx), WELS_LOG_INFO, "[MT] Open pExitEncodeEvent%d named(%s) ret%d err%d\n", iIdx, name, err, errno);
-#endif//_WIN32
+    WelsSnprintf (name, SEM_NAME_MAX, "tm%d%s", iIdx, pSmt->eventNamespace);
+    err = WelsEventOpen (&pSmt->pThreadMasterEvent[iIdx], name);
+    MT_TRACE_LOG ((*ppCtx), WELS_LOG_INFO, "[MT] Open pThreadMasterEvent%d named(%s) ret%d err%d\n", iIdx, name, err, errno);
     // length of semaphore name should be system constrained at least on mac 10.7
     WelsSnprintf (name, SEM_NAME_MAX, "ud%d%s", iIdx, pSmt->eventNamespace);
     err = WelsEventOpen (&pSmt->pUpdateMbListEvent[iIdx], name);
@@ -386,6 +391,10 @@
     ++ iIdx;
   }
 
+  WelsSnprintf (name, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
+  err = WelsEventOpen (&pSmt->pSliceCodedMasterEvent, name);
+  MT_TRACE_LOG ((*ppCtx), WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d\n", name, err, errno);
+
   (*ppCtx)->pSliceBs	= (SWelsSliceBs*)pMa->WelsMalloc (sizeof (SWelsSliceBs) * iMaxSliceNum, "pSliceBs");
   WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSliceBs), FreeMemorySvc (ppCtx))
 
@@ -444,8 +453,8 @@
   if (NULL == pSmt)
     return;
 
+  char ename[SEM_NAME_MAX] = {0};
   while (iIdx < iThreadNum) {
-    char ename[SEM_NAME_MAX] = {0};
     // length of semaphore name should be system constrained at least on mac 10.7
 #ifdef _WIN32
     if (pSmt->pThreadHandles != NULL && pSmt->pThreadHandles[iIdx] != NULL)
@@ -453,9 +462,11 @@
 
     WelsSnprintf (ename, SEM_NAME_MAX, "fs%d%s", iIdx, pSmt->eventNamespace);
     WelsEventClose (&pSmt->pFinSliceCodingEvent[iIdx], ename);
+#endif//_WIN32
     WelsSnprintf (ename, SEM_NAME_MAX, "ee%d%s", iIdx, pSmt->eventNamespace);
     WelsEventClose (&pSmt->pExitEncodeEvent[iIdx], ename);
-#endif//_WIN32
+    WelsSnprintf (ename, SEM_NAME_MAX, "tm%d%s", iIdx, pSmt->eventNamespace);
+    WelsEventClose (&pSmt->pThreadMasterEvent[iIdx], ename);
     WelsSnprintf (ename, SEM_NAME_MAX, "sc%d%s", iIdx, pSmt->eventNamespace);
     WelsEventClose (&pSmt->pSliceCodedEvent[iIdx], ename);
     WelsSnprintf (ename, SEM_NAME_MAX, "rc%d%s", iIdx, pSmt->eventNamespace);
@@ -467,6 +478,8 @@
 
     ++ iIdx;
   }
+  WelsSnprintf (ename, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
+  WelsEventClose (&pSmt->pSliceCodedMasterEvent, ename);
 
   WelsMutexDestroy (&pSmt->mutexSliceNumUpdate);
   WelsMutexDestroy (&((*ppCtx)->mutexEncoderError));
@@ -669,45 +682,6 @@
   return iReturn;
 }
 
-#if !defined(_WIN32)
-WELS_THREAD_ROUTINE_TYPE UpdateMbListThreadProc (void* arg) {
-  SSliceThreadPrivateData* pPrivateData	= (SSliceThreadPrivateData*)arg;
-  sWelsEncCtx* pEncPEncCtx			= NULL;
-  SDqLayer* pCurDq							= NULL;
-  int32_t iSliceIdx							= -1;
-  int32_t iEventIdx							= -1;
-  WELS_THREAD_ERROR_CODE iWaitRet				= WELS_THREAD_ERROR_GENERAL;
-  uint32_t uiThrdRet							= 0;
-
-  if (NULL == pPrivateData)
-    WELS_THREAD_ROUTINE_RETURN (1);
-
-  pEncPEncCtx	= (sWelsEncCtx*)pPrivateData->pWelsPEncCtx;
-  iSliceIdx		= pPrivateData->iSliceIndex;
-  iEventIdx		= pPrivateData->iThreadIndex;
-
-  do {
-    MT_TRACE_LOG (pEncPEncCtx, WELS_LOG_INFO, "[MT] UpdateMbListThreadProc(), try to wait (pUpdateMbListEvent[%d])!\n",
-                  iEventIdx);
-    iWaitRet = WelsEventWait (&pEncPEncCtx->pSliceThreading->pUpdateMbListEvent[iEventIdx]);
-    if (WELS_THREAD_ERROR_WAIT_OBJECT_0 == iWaitRet) {
-      pCurDq			= pEncPEncCtx->pCurDqLayer;
-      UpdateMbListNeighborParallel (pCurDq->pSliceEncCtx, pCurDq->sMbDataP, iSliceIdx);
-      WelsEventSignal (
-        &pEncPEncCtx->pSliceThreading->pFinUpdateMbListEvent[iEventIdx]);	// mean finished update pMb list for this pSlice
-    } else {
-      WelsLog (pEncPEncCtx, WELS_LOG_WARNING,
-               "[MT] UpdateMbListThreadProc(), waiting pUpdateMbListEvent[%d] failed(%d) and thread%d terminated!\n", iEventIdx,
-               iWaitRet, iEventIdx);
-      uiThrdRet = 1;
-      break;
-    }
-  } while (1);
-
-  WELS_THREAD_ROUTINE_RETURN (uiThrdRet);
-}
-#endif//!_WIN32
-
 // thread process for coding one pSlice
 WELS_THREAD_ROUTINE_TYPE CodingSliceThreadProc (void* arg) {
   SSliceThreadPrivateData* pPrivateData	= (SSliceThreadPrivateData*)arg;
@@ -715,10 +689,8 @@
   SDqLayer* pCurDq							= NULL;
   SSlice* pSlice								= NULL;
   SWelsSliceBs* pSliceBs						= NULL;
-#ifdef _WIN32
   WELS_EVENT pEventsList[3];
   int32_t iEventCount						= 0;
-#endif
   WELS_THREAD_ERROR_CODE iWaitRet				= WELS_THREAD_ERROR_GENERAL;
   uint32_t uiThrdRet							= 0;
   int32_t iSliceSize							= 0;
@@ -740,23 +712,16 @@
   iThreadIdx		= pPrivateData->iThreadIndex;
   iEventIdx		= iThreadIdx;
 
-#ifdef _WIN32
   pEventsList[iEventCount++]	= pEncPEncCtx->pSliceThreading->pReadySliceCodingEvent[iEventIdx];
   pEventsList[iEventCount++]	= pEncPEncCtx->pSliceThreading->pExitEncodeEvent[iEventIdx];
   pEventsList[iEventCount++] = pEncPEncCtx->pSliceThreading->pUpdateMbListEvent[iEventIdx];
-#endif//_WIN32
 
   do {
-#ifdef _WIN32
-    iWaitRet = WelsMultipleEventsWaitSingleBlocking (iEventCount,
-               &pEventsList[0],
-               (uint32_t) - 1);	// blocking until at least one event is
-#else
     MT_TRACE_LOG (pEncPEncCtx, WELS_LOG_INFO,
-                  "[MT] CodingSliceThreadProc(), try to call WelsEventWait(pReadySliceCodingEvent[%d]= 0x%p), pEncPEncCtx= 0x%p!\n",
-                  iEventIdx, (void*) (pEncPEncCtx->pSliceThreading->pReadySliceCodingEvent[iEventIdx]), (void*)pEncPEncCtx);
-    iWaitRet = WelsEventWait (&pEncPEncCtx->pSliceThreading->pReadySliceCodingEvent[iEventIdx]);
-#endif//WIN32
+                  "[MT] CodingSliceThreadProc(), try to call WelsMultipleEventsWaitSingleBlocking(pEventsList= %p %p %p), pEncPEncCtx= %p!\n",
+                  pEventsList[0], pEventsList[1], pEventsList[1], (void*)pEncPEncCtx);
+    iWaitRet = WelsMultipleEventsWaitSingleBlocking (iEventCount,
+               &pEventsList[0], &pEncPEncCtx->pSliceThreading->pThreadMasterEvent[iEventIdx]); // blocking until at least one event is signalled
     if (WELS_THREAD_ERROR_WAIT_OBJECT_0 == iWaitRet) {	// start pSlice coding signal waited
       SLayerBSInfo* pLbi = pPrivateData->pLayerBs;
       const int32_t kiCurDid			= pEncPEncCtx->uiDependencyId;
@@ -865,6 +830,8 @@
 
         WelsEventSignal (
           &pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]);	// mean finished coding current pSlice
+        WelsEventSignal (
+          &pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent);
       } else {	// for SM_DYN_SLICE parallelization
         SSliceCtx* pSliceCtx			= pCurDq->pSliceEncCtx;
         const int32_t kiPartitionId			= iThreadIdx;
@@ -968,9 +935,9 @@
           break;
 
         WelsEventSignal (&pEncPEncCtx->pSliceThreading->pSliceCodedEvent[iEventIdx]);	// mean finished coding current pSlice
+        WelsEventSignal (&pEncPEncCtx->pSliceThreading->pSliceCodedMasterEvent);
       }
     }
-#ifdef _WIN32
     else if (WELS_THREAD_ERROR_WAIT_OBJECT_0 + 1 == iWaitRet) {	// exit thread signal
       uiThrdRet	= 0;
       break;
@@ -983,7 +950,6 @@
       WelsEventSignal (
         &pEncPEncCtx->pSliceThreading->pFinUpdateMbListEvent[iEventIdx]);	// mean finished update pMb list for this pSlice
     }
-#endif//WIN32
     else { // WELS_THREAD_ERROR_WAIT_TIMEOUT, or WELS_THREAD_ERROR_WAIT_FAILED
       WelsLog (pEncPEncCtx, WELS_LOG_WARNING,
                "[MT] CodingSliceThreadProc(), waiting pReadySliceCodingEvent[%d] failed(%d) and thread%d terminated!\n", iEventIdx,
@@ -1031,13 +997,6 @@
       }
     }
 #endif//WIN32 && BIND_CPU_CORES_TO_THREADS
-    // We need extra threads for update_mb_list_proc on __GNUC__ like OS (mac/linux)
-    // due to WelsMultipleEventsWaitSingleBlocking implememtation can not work well
-    // in case waiting pUpdateMbListEvent and pReadySliceCodingEvent events at the same time
-#if !defined(_WIN32)
-    WelsThreadCreate (&pCtx->pSliceThreading->pUpdateMbListThrdHandles[iIdx], UpdateMbListThreadProc,
-                      &pCtx->pSliceThreading->pThreadPEncCtx[iIdx], 0);
-#endif//!_WIN32
 
     ++ iIdx;
   }
@@ -1045,7 +1004,7 @@
   return 0;
 }
 
-int32_t FiredSliceThreads (SSliceThreadPrivateData* pPriData, WELS_EVENT* pEventsList, SLayerBSInfo* pLbi,
+int32_t FiredSliceThreads (SSliceThreadPrivateData* pPriData, WELS_EVENT* pEventsList, WELS_EVENT* pMasterEventsList, SLayerBSInfo* pLbi,
                            const uint32_t uiNumThreads, SSliceCtx* pSliceCtx, const bool bIsDynamicSlicingMode)
 {
   int32_t iEndMbIdx	= 0;
@@ -1076,6 +1035,8 @@
     pPriData[iIdx].iSliceIndex	= iIdx;
     if (pEventsList[iIdx])
       WelsEventSignal (&pEventsList[iIdx]);
+    if (pMasterEventsList[iIdx])
+      WelsEventSignal (&pMasterEventsList[iIdx]);
     ++ iIdx;
   }