shithub: openh264

Download patch

ref: 767073cbb2bdc3197cecd1b4ccf1c200750d1c78
parent: 8df056618d72a545633a8e1545962f511a7e0ac7
parent: f38111d76ba127b4045e6a9899cbb753457c7384
author: ekr <ekr@rtfm.com>
date: Wed Jan 8 12:39:00 EST 2014

Merge pull request #118 from licaiguo/move-to-common-2

move common code(deblocking and cpu) to common, enable idct asm in encoder

--- /dev/null
+++ b/codec/common/cpu.cpp
@@ -1,0 +1,194 @@
+/*!
+ * \copy
+ *     Copyright (c)  2009-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * \file	cpu.cpp
+ *
+ * \brief	CPU compatibility detection
+ *
+ * \date	04/29/2009 Created
+ *
+ *************************************************************************************
+ */
+#include <string.h>
+
+#include "cpu.h"
+#include "cpu_core.h"
+
+
+
+#define    CPU_Vender_AMD    "AuthenticAMD"
+#define    CPU_Vender_INTEL  "GenuineIntel"
+#define    CPU_Vender_CYRIX  "CyrixInstead"
+
+#if defined(X86_ASM)
+
+uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
+  uint32_t uiCPU = 0;
+  uint32_t uiFeatureA = 0, uiFeatureB = 0, uiFeatureC = 0, uiFeatureD = 0;
+  int32_t  CacheLineSize = 0;
+  int8_t   chVenderName[16] = { 0 };
+
+  if (!WelsCPUIdVerify()) {
+    /* cpuid is not supported in cpu */
+    return 0;
+  }
+
+  WelsCPUId (0, &uiFeatureA, (uint32_t*)&chVenderName[0], (uint32_t*)&chVenderName[8], (uint32_t*)&chVenderName[4]);
+  if (uiFeatureA == 0) {
+    /* maximum input value for basic cpuid information */
+    return 0;
+  }
+
+  WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
+  if ((uiFeatureD & 0x00800000) == 0) {
+    /* Basic MMX technology is not support in cpu, mean nothing for us so return here */
+    return 0;
+  }
+
+  uiCPU = WELS_CPU_MMX;
+  if (uiFeatureD & 0x02000000) {
+    /* SSE technology is identical to AMD MMX extensions */
+    uiCPU |= WELS_CPU_MMXEXT | WELS_CPU_SSE;
+  }
+  if (uiFeatureD & 0x04000000) {
+    /* SSE2 support here */
+    uiCPU |= WELS_CPU_SSE2;
+  }
+  if (uiFeatureD & 0x00000001) {
+    /* x87 FPU on-chip checking */
+    uiCPU |= WELS_CPU_FPU;
+  }
+  if (uiFeatureD & 0x00008000) {
+    /* CMOV instruction checking */
+    uiCPU |= WELS_CPU_CMOV;
+  }
+  if (!strcmp ((const str_t*)chVenderName, CPU_Vender_INTEL)) {	// confirmed_safe_unsafe_usage
+    if (uiFeatureD & 0x10000000) {
+      /* Multi-Threading checking: contains of multiple logic processors */
+      uiCPU |= WELS_CPU_HTT;
+    }
+  }
+
+  if (uiFeatureC & 0x00000001) {
+    /* SSE3 support here */
+    uiCPU |= WELS_CPU_SSE3;
+  }
+  if (uiFeatureC & 0x00000200) {
+    /* SSSE3 support here */
+    uiCPU |= WELS_CPU_SSSE3;
+  }
+  if (uiFeatureC & 0x00080000) {
+    /* SSE4.1 support here, 45nm Penryn processor */
+    uiCPU |= WELS_CPU_SSE41;
+  }
+  if (uiFeatureC & 0x00100000) {
+    /* SSE4.2 support here, next generation Nehalem processor */
+    uiCPU |= WELS_CPU_SSE42;
+  }
+  if (WelsCPUSupportAVX (uiFeatureA, uiFeatureC)) {
+    /* AVX supported */
+    uiCPU |= WELS_CPU_AVX;
+  }
+  if (WelsCPUSupportFMA (uiFeatureA, uiFeatureC)) {
+    /* AVX FMA supported */
+    uiCPU |= WELS_CPU_FMA;
+  }
+  if (uiFeatureC & 0x02000000) {
+    /* AES checking */
+    uiCPU |= WELS_CPU_AES;
+  }
+  if (uiFeatureC & 0x00400000) {
+    /* MOVBE checking */
+    uiCPU |= WELS_CPU_MOVBE;
+  }
+
+  if (pNumberOfLogicProcessors != NULL) {
+    // HTT enabled on chip
+    *pNumberOfLogicProcessors = (uiFeatureB & 0x00ff0000) >> 16; // feature bits: 23-16 on returned EBX
+  }
+
+  WelsCPUId (0x80000000, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
+
+  if ((!strcmp ((const str_t*)chVenderName, CPU_Vender_AMD))
+      && (uiFeatureA >= 0x80000001)) {	// confirmed_safe_unsafe_usage
+    WelsCPUId (0x80000001, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
+    if (uiFeatureD & 0x00400000) {
+      uiCPU |= WELS_CPU_MMXEXT;
+    }
+    if (uiFeatureD & 0x80000000) {
+      uiCPU |= WELS_CPU_3DNOW;
+    }
+  }
+
+  if (!strcmp ((const str_t*)chVenderName, CPU_Vender_INTEL)) {	// confirmed_safe_unsafe_usage
+    int32_t  family, model;
+
+    WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
+    family = ((uiFeatureA >> 8) & 0xf) + ((uiFeatureA >> 20) & 0xff);
+    model  = ((uiFeatureA >> 4) & 0xf) + ((uiFeatureA >> 12) & 0xf0);
+
+    if ((family == 6) && (model == 9 || model == 13 || model == 14)) {
+      uiCPU &= ~ (WELS_CPU_SSE2 | WELS_CPU_SSE3);
+    }
+  }
+
+  // get cache line size
+  if ((!strcmp ((const str_t*)chVenderName, CPU_Vender_INTEL))
+      || ! (strcmp ((const str_t*)chVenderName, CPU_Vender_CYRIX))) {	// confirmed_safe_unsafe_usage
+    WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
+
+    CacheLineSize = (uiFeatureB & 0xff00) >>
+                    5;	// ((clflush_line_size >> 8) << 3), CLFLUSH_line_size * 8 = CacheLineSize_in_byte
+
+    if (CacheLineSize == 128) {
+      uiCPU |= WELS_CPU_CACHELINE_128;
+    } else if (CacheLineSize == 64) {
+      uiCPU |= WELS_CPU_CACHELINE_64;
+    } else if (CacheLineSize == 32) {
+      uiCPU |= WELS_CPU_CACHELINE_32;
+    } else if (CacheLineSize == 16) {
+      uiCPU |= WELS_CPU_CACHELINE_16;
+    }
+  }
+
+  return uiCPU;
+}
+
+
+void WelsCPURestore (const uint32_t kuiCPU) {
+  if (kuiCPU & (WELS_CPU_MMX | WELS_CPU_MMXEXT | WELS_CPU_3DNOW | WELS_CPU_3DNOWEXT)) {
+    WelsEmms();
+  }
+}
+
+#endif
+
+
--- /dev/null
+++ b/codec/common/cpu.h
@@ -1,0 +1,80 @@
+/*!
+ * \copy
+ *     Copyright (c)  2009-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * \file	cpu.h
+ *
+ * \brief	CPU feature compatibility detection
+ *
+ * \date	04/29/2009 Created
+ *
+ *************************************************************************************
+ */
+#if !defined(WELS_CPU_DETECTION_H__)
+#define WELS_CPU_DETECTION_H__
+
+#include "typedefs.h"
+
+
+
+#if defined(__cplusplus)
+extern "C" {
+#endif//__cplusplus
+
+#if defined(X86_ASM)
+/*
+ *	cpuid support verify routine
+ *  return 0 if cpuid is not supported by cpu
+ */
+int32_t  WelsCPUIdVerify();
+
+void_t WelsCPUId (uint32_t uiIndex, uint32_t* pFeatureA, uint32_t* pFeatureB, uint32_t* pFeatureC, uint32_t* pFeatureD);
+
+int32_t WelsCPUSupportAVX (uint32_t eax, uint32_t ecx);
+int32_t WelsCPUSupportFMA (uint32_t eax, uint32_t ecx);
+
+void_t WelsEmms();
+
+uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors);
+
+/*
+ *	clear FPU registers states for potential float based calculation if support
+ */
+void     WelsCPURestore (const uint32_t kuiCPU);
+
+#endif
+
+#if defined(__cplusplus)
+}
+#endif//__cplusplus
+
+
+
+#endif//WELS_CPU_DETECTION_H__
--- /dev/null
+++ b/codec/common/cpu_core.h
@@ -1,0 +1,80 @@
+/*!
+ * \copy
+ *     Copyright (c)  2009-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * \file	cpu_core.h
+ *
+ * \brief	cpu core feature detection
+ *
+ * \date	4/24/2009 Created
+ *
+ *************************************************************************************
+ */
+#if !defined(WELS_CPU_CORE_FEATURE_DETECTION_H__)
+#define WELS_CPU_CORE_FEATURE_DETECTION_H__
+
+/*
+ *	WELS CPU feature flags
+ */
+#define WELS_CPU_MMX        0x00000001    /* mmx */
+#define WELS_CPU_MMXEXT     0x00000002    /* mmx-ext*/
+#define WELS_CPU_SSE        0x00000004    /* sse */
+#define WELS_CPU_SSE2       0x00000008    /* sse 2 */
+#define WELS_CPU_SSE3       0x00000010    /* sse 3 */
+#define WELS_CPU_SSE41      0x00000020    /* sse 4.1 */
+#define WELS_CPU_3DNOW      0x00000040    /* 3dnow! */
+#define WELS_CPU_3DNOWEXT   0x00000080    /* 3dnow! ext */
+#define WELS_CPU_ALTIVEC    0x00000100    /* altivec */
+#define WELS_CPU_SSSE3      0x00000200    /* ssse3 */
+#define WELS_CPU_SSE42      0x00000400    /* sse 4.2 */
+
+/* CPU features application extensive */
+#define WELS_CPU_AVX		0x00000800	/* Advanced Vector eXtentions */
+#define WELS_CPU_FPU		0x00001000	/* x87-FPU on chip */
+#define WELS_CPU_HTT		0x00002000	/* Hyper-Threading Technology (HTT), Multi-threading enabled feature:
+										   physical processor package is capable of supporting more than one logic processor
+										*/
+#define WELS_CPU_CMOV		0x00004000	/* Conditional Move Instructions,
+										   also if x87-FPU is present at indicated by the CPUID.FPU feature bit, then FCOMI and FCMOV are supported
+										*/
+#define WELS_CPU_MOVBE		0x00008000	/* MOVBE instruction */
+#define WELS_CPU_AES		0x00010000	/* AES instruction extensions */
+#define WELS_CPU_FMA		0x00020000	/* AVX VEX FMA instruction sets */
+
+#define WELS_CPU_CACHELINE_16    0x10000000    /* CacheLine Size 16 */
+#define WELS_CPU_CACHELINE_32    0x20000000    /* CacheLine Size 32 */
+#define WELS_CPU_CACHELINE_64    0x40000000    /* CacheLine Size 64 */
+#define WELS_CPU_CACHELINE_128   0x80000000    /* CacheLine Size 128 */
+
+/*
+ *	Interfaces for CPU core feature detection as below
+ */
+
+#endif//WELS_CPU_CORE_FEATURE_DETECTION_H__
--- /dev/null
+++ b/codec/common/deblocking_common.cpp
@@ -1,0 +1,205 @@
+#include "deblocking_common.h"
+#include "macros.h"
+//  C code only
+void_t DeblockLumaLt4_c (uint8_t* pPix, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta,
+                         int8_t* pTc) {
+  for (int32_t i = 0; i < 16; i++) {
+    int32_t iTc0 = pTc[i >> 2];
+    if (iTc0 >= 0) {
+      int32_t p0 = pPix[-iStrideX];
+      int32_t p1 = pPix[-2 * iStrideX];
+      int32_t p2 = pPix[-3 * iStrideX];
+      int32_t q0 = pPix[0];
+      int32_t q1 = pPix[iStrideX];
+      int32_t q2 = pPix[2 * iStrideX];
+      bool_t bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
+      bool_t bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
+      bool_t bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
+      int32_t iTc = iTc0;
+      if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
+        bool_t bDetaP2P0 =  WELS_ABS (p2 - p0) < iBeta;
+        bool_t bDetaQ2Q0 =  WELS_ABS (q2 - q0) < iBeta;
+        if (bDetaP2P0) {
+          pPix[-2 * iStrideX] = p1 + WELS_CLIP3 ((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1, -iTc0, iTc0);
+          iTc++;
+        }
+        if (bDetaQ2Q0) {
+          pPix[iStrideX] = q1 + WELS_CLIP3 ((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1, -iTc0, iTc0);
+          iTc++;
+        }
+        int32_t iDeta = WELS_CLIP3 ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3, -iTc, iTc);
+        pPix[-iStrideX] = WELS_CLIP1 (p0 + iDeta);     /* p0' */
+        pPix[0]  = WELS_CLIP1 (q0 - iDeta);     /* q0' */
+      }
+    }
+    pPix += iStrideY;
+  }
+}
+void_t DeblockLumaEq4_c (uint8_t* pPix, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta) {
+  int32_t p0, p1, p2, q0, q1, q2;
+  int32_t iDetaP0Q0;
+  bool_t bDetaP1P0, bDetaQ1Q0;
+  for (int32_t i = 0; i < 16; i++) {
+    p0 = pPix[-iStrideX];
+    p1 = pPix[-2 * iStrideX];
+    p2 = pPix[-3 * iStrideX];
+    q0 = pPix[0];
+    q1 = pPix[iStrideX];
+    q2 = pPix[2 * iStrideX];
+    iDetaP0Q0 = WELS_ABS (p0 - q0);
+    bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
+    bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
+    if ((iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0) {
+      if (iDetaP0Q0 < ((iAlpha >> 2) + 2)) {
+        bool_t bDetaP2P0 = WELS_ABS (p2 - p0) < iBeta;
+        bool_t bDetaQ2Q0 =  WELS_ABS (q2 - q0) < iBeta;
+        if (bDetaP2P0) {
+          const int32_t p3 = pPix[-4 * iStrideX];
+          pPix[-iStrideX] = (p2 + (p1 << 1) + (p0 << 1) + (q0 << 1) + q1 + 4) >> 3;	   //p0
+          pPix[-2 * iStrideX] = (p2 + p1 + p0 + q0 + 2) >> 2;	 //p1
+          pPix[-3 * iStrideX] = ((p3 << 1) + p2 + (p2 << 1) + p1 + p0 + q0 + 4) >> 3;//p2
+        } else {
+          pPix[-1 * iStrideX] = ((p1 << 1) + p0 + q1 + 2) >> 2;	//p0
+        }
+        if (bDetaQ2Q0) {
+          const int32_t q3 = pPix[3 * iStrideX];
+          pPix[0] = (p1 + (p0 << 1) + (q0 << 1) + (q1 << 1) + q2 + 4) >> 3;   //q0
+          pPix[iStrideX] = (p0 + q0 + q1 + q2 + 2) >> 2;   //q1
+          pPix[2 * iStrideX] = ((q3 << 1) + q2 + (q2 << 1) + q1 + q0 + p0 + 4) >> 3;//q2
+        } else {
+          pPix[0] = ((q1 << 1) + q0 + p1 + 2) >> 2;   //q0
+        }
+      } else {
+        pPix[-iStrideX] = ((p1 << 1) + p0 + q1 + 2) >> 2;   //p0
+        pPix[ 0] = ((q1 << 1) + q0 + p1 + 2) >> 2;   //q0
+      }
+    }
+    pPix += iStrideY;
+  }
+}
+void_t DeblockLumaLt4V_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc) {
+  DeblockLumaLt4_c (pPix, iStride, 1, iAlpha, iBeta, tc);
+}
+void_t DeblockLumaLt4H_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc) {
+  DeblockLumaLt4_c (pPix, 1, iStride, iAlpha, iBeta, tc);
+}
+void_t DeblockLumaEq4V_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
+  DeblockLumaEq4_c (pPix, iStride, 1, iAlpha, iBeta);
+}
+void_t DeblockLumaEq4H_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
+  DeblockLumaEq4_c (pPix, 1, iStride, iAlpha, iBeta);
+}
+void_t DeblockChromaLt4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha,
+                           int32_t iBeta, int8_t* pTc) {
+  int32_t p0, p1, q0, q1, iDeta;
+  bool_t bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
+
+  for (int32_t i = 0; i < 8; i++) {
+    int32_t iTc0 = pTc[i >> 1];
+    if (iTc0 > 0) {
+      p0 = pPixCb[-iStrideX];
+      p1 = pPixCb[-2 * iStrideX];
+      q0 = pPixCb[0];
+      q1 = pPixCb[iStrideX];
+
+      bDetaP0Q0 =  WELS_ABS (p0 - q0) < iAlpha;
+      bDetaP1P0 =  WELS_ABS (p1 - p0) < iBeta;
+      bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
+      if (bDetaP0Q0 && bDetaP1P0 &&	bDetaQ1Q0) {
+        iDeta = WELS_CLIP3 ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3, -iTc0, iTc0);
+        pPixCb[-iStrideX] = WELS_CLIP1 (p0 + iDeta);     /* p0' */
+        pPixCb[0]  = WELS_CLIP1 (q0 - iDeta);     /* q0' */
+      }
+
+
+      p0 = pPixCr[-iStrideX];
+      p1 = pPixCr[-2 * iStrideX];
+      q0 = pPixCr[0];
+      q1 = pPixCr[iStrideX];
+
+      bDetaP0Q0 =  WELS_ABS (p0 - q0) < iAlpha;
+      bDetaP1P0 =  WELS_ABS (p1 - p0) < iBeta;
+      bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
+
+      if (bDetaP0Q0 && bDetaP1P0 &&	bDetaQ1Q0) {
+        iDeta = WELS_CLIP3 ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3, -iTc0, iTc0);
+        pPixCr[-iStrideX] = WELS_CLIP1 (p0 + iDeta);     /* p0' */
+        pPixCr[0]  = WELS_CLIP1 (q0 - iDeta);     /* q0' */
+      }
+    }
+    pPixCb += iStrideY;
+    pPixCr += iStrideY;
+  }
+}
+void_t DeblockChromaEq4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha,
+                           int32_t iBeta) {
+  int32_t i = 0, d = 0;
+  int32_t p0, p1, q0, q1;
+  bool_t bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
+  for (int32_t i = 0; i < 8; i++) {
+    //cb
+    p0 = pPixCb[-iStrideX];
+    p1 = pPixCb[-2 * iStrideX];
+    q0 = pPixCb[0];
+    q1 = pPixCb[iStrideX];
+    bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
+    bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
+    bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
+    if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
+      pPixCb[-iStrideX] = ((p1 << 1) + p0 + q1 + 2) >> 2;     /* p0' */
+      pPixCb[0]  = ((q1 << 1) + q0 + p1 + 2) >> 2;     /* q0' */
+    }
+
+    //cr
+    p0 = pPixCr[-iStrideX];
+    p1 = pPixCr[-2 * iStrideX];
+    q0 = pPixCr[0];
+    q1 = pPixCr[iStrideX];
+    bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
+    bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
+    bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
+    if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
+      pPixCr[-iStrideX] = ((p1 << 1) + p0 + q1 + 2) >> 2;     /* p0' */
+      pPixCr[0]  = ((q1 << 1) + q0 + p1 + 2) >> 2;     /* q0' */
+    }
+    pPixCr += iStrideY;
+    pPixCb += iStrideY;
+  }
+}
+void_t DeblockChromaLt4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
+                            int8_t* tc) {
+  DeblockChromaLt4_c (pPixCb, pPixCr, iStride, 1, iAlpha, iBeta, tc);
+}
+void_t DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
+                            int8_t* tc) {
+  DeblockChromaLt4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta, tc);
+}
+void_t DeblockChromaEq4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
+  DeblockChromaEq4_c (pPixCb, pPixCr, iStride, 1, iAlpha, iBeta);
+}
+void_t DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
+  DeblockChromaEq4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta);
+}
+
+#ifdef X86_ASM
+extern "C" {
+  void DeblockLumaLt4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
+    FORCE_STACK_ALIGN_1D (uint8_t,  uiBuf,   16 * 8, 16);
+
+    DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
+    DeblockLumaLt4V_sse2 (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc);
+    DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]);
+  }
+
+  void DeblockLumaEq4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
+    FORCE_STACK_ALIGN_1D (uint8_t,  uiBuf,   16 * 8, 16);
+
+    DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
+    DeblockLumaEq4V_sse2 (&uiBuf[4 * 16], 16, iAlpha, iBeta);
+    DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]);
+  }
+
+}
+
+#endif
+
--- /dev/null
+++ b/codec/common/deblocking_common.h
@@ -1,0 +1,39 @@
+#ifndef WELS_DEBLOCKING_COMMON_H__
+#define WELS_DEBLOCKING_COMMON_H__
+#include "typedefs.h"
+void_t DeblockLumaLt4V_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
+void_t DeblockLumaEq4V_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+
+void_t DeblockLumaLt4H_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
+void_t DeblockLumaEq4H_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+
+void_t DeblockChromaLt4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
+                            int8_t* pTc);
+void_t DeblockChromaEq4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+
+void_t DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
+                            int8_t* pTc);
+void_t DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+
+#if defined(__cplusplus)
+extern "C" {
+#endif//__cplusplus
+
+#ifdef  X86_ASM
+void DeblockLumaLt4V_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
+void DeblockLumaEq4V_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+void DeblockLumaTransposeH2V_sse2 (uint8_t* pPixY, int32_t iStride, uint8_t* pDst);
+void DeblockLumaTransposeV2H_sse2 (uint8_t* pPixY, int32_t iStride, uint8_t* pSrc);
+void DeblockLumaLt4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
+void DeblockLumaEq4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+void DeblockChromaEq4V_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+void DeblockChromaLt4V_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
+                             int8_t* pTC);
+void DeblockChromaEq4H_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
+void DeblockChromaLt4H_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
+                             int8_t* pTC);
+#endif
+#if defined(__cplusplus)
+}
+#endif//__cplusplus
+#endif //WELS_DEBLOCKING_COMMON_H__
--- a/codec/common/targets.mk
+++ b/codec/common/targets.mk
@@ -2,28 +2,57 @@
 COMMON_SRCDIR=codec/common
 COMMON_CPP_SRCS=\
 	$(COMMON_SRCDIR)/./logging.cpp\
+	$(COMMON_SRCDIR)/./deblocking_common.cpp\
+	$(COMMON_SRCDIR)/./cpu.cpp\
 
 COMMON_OBJS += $(COMMON_CPP_SRCS:.cpp=.o)
 ifeq ($(USE_ASM), Yes)
 COMMON_ASM_SRCS=\
+	$(COMMON_SRCDIR)/./deblock.asm\
+	$(COMMON_SRCDIR)/./vaa.asm\
 	$(COMMON_SRCDIR)/./asm_inc.asm\
+	$(COMMON_SRCDIR)/./mc_luma.asm\
 	$(COMMON_SRCDIR)/./cpuid.asm\
-	$(COMMON_SRCDIR)/./deblock.asm\
-	$(COMMON_SRCDIR)/./expand_picture.asm\
-	$(COMMON_SRCDIR)/./mb_copy.asm\
 	$(COMMON_SRCDIR)/./mc_chroma.asm\
-	$(COMMON_SRCDIR)/./mc_luma.asm\
-	$(COMMON_SRCDIR)/./vaa.asm\
+	$(COMMON_SRCDIR)/./mb_copy.asm\
+	$(COMMON_SRCDIR)/./expand_picture.asm\
 
 COMMON_OBJS += $(COMMON_ASM_SRCS:.asm=.o)
 endif
 
 OBJS += $(COMMON_OBJS)
-$(COMMON_SRCDIR)/%.o: $(COMMON_SRCDIR)/%.cpp
-	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(COMMON_CFLAGS) $(COMMON_INCLUDES) -c -o $@ $<
+$(COMMON_SRCDIR)/./logging.o: $(COMMON_SRCDIR)/./logging.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(COMMON_CFLAGS) $(COMMON_INCLUDES) -c -o $(COMMON_SRCDIR)/./logging.o $(COMMON_SRCDIR)/./logging.cpp
 
-$(COMMON_SRCDIR)/%.o: $(COMMON_SRCDIR)/%.asm
-	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $@ $<
+$(COMMON_SRCDIR)/./deblocking_common.o: $(COMMON_SRCDIR)/./deblocking_common.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(COMMON_CFLAGS) $(COMMON_INCLUDES) -c -o $(COMMON_SRCDIR)/./deblocking_common.o $(COMMON_SRCDIR)/./deblocking_common.cpp
+
+$(COMMON_SRCDIR)/./cpu.o: $(COMMON_SRCDIR)/./cpu.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(COMMON_CFLAGS) $(COMMON_INCLUDES) -c -o $(COMMON_SRCDIR)/./cpu.o $(COMMON_SRCDIR)/./cpu.cpp
+
+$(COMMON_SRCDIR)/./deblock.o: $(COMMON_SRCDIR)/./deblock.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./deblock.o $(COMMON_SRCDIR)/./deblock.asm
+
+$(COMMON_SRCDIR)/./vaa.o: $(COMMON_SRCDIR)/./vaa.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./vaa.o $(COMMON_SRCDIR)/./vaa.asm
+
+$(COMMON_SRCDIR)/./asm_inc.o: $(COMMON_SRCDIR)/./asm_inc.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./asm_inc.o $(COMMON_SRCDIR)/./asm_inc.asm
+
+$(COMMON_SRCDIR)/./mc_luma.o: $(COMMON_SRCDIR)/./mc_luma.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./mc_luma.o $(COMMON_SRCDIR)/./mc_luma.asm
+
+$(COMMON_SRCDIR)/./cpuid.o: $(COMMON_SRCDIR)/./cpuid.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./cpuid.o $(COMMON_SRCDIR)/./cpuid.asm
+
+$(COMMON_SRCDIR)/./mc_chroma.o: $(COMMON_SRCDIR)/./mc_chroma.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./mc_chroma.o $(COMMON_SRCDIR)/./mc_chroma.asm
+
+$(COMMON_SRCDIR)/./mb_copy.o: $(COMMON_SRCDIR)/./mb_copy.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./mb_copy.o $(COMMON_SRCDIR)/./mb_copy.asm
+
+$(COMMON_SRCDIR)/./expand_picture.o: $(COMMON_SRCDIR)/./expand_picture.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./expand_picture.o $(COMMON_SRCDIR)/./expand_picture.asm
 
 $(LIBPREFIX)common.$(LIBSUFFIX): $(COMMON_OBJS)
 	rm -f $(LIBPREFIX)common.$(LIBSUFFIX)
--- /dev/null
+++ b/codec/common/typedefs.h
@@ -1,0 +1,90 @@
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+// typedef.h
+#ifndef WELS_TYPE_DEFINES_H__
+#define WELS_TYPE_DEFINES_H__
+
+#include <limits.h>
+
+////////////////////////////////////////////////////////////////////////////
+// NOTICE : ALL internal implement MUST use the data type defined as below
+//          ONLY except with the interface file !!!!!
+////////////////////////////////////////////////////////////////////////////
+
+#ifndef  _MSC_VER
+
+#include <stdint.h>
+
+#else
+
+// FIXME:     all singed type should be declared explicit,  for example,  int8_t should be declared as signed char.
+typedef signed char      int8_t  ;
+typedef unsigned char    uint8_t ;
+typedef short            int16_t ;
+typedef unsigned short   uint16_t;
+typedef int              int32_t ;
+typedef unsigned int     uint32_t;
+typedef __int64          int64_t ;
+typedef unsigned __int64 uint64_t;
+
+#endif // _MSC_VER defined
+
+// FIXME:     all string type should be declared explicit as char.
+typedef char      str_t;
+typedef float     real32_t;
+#ifdef EPSN
+#undef EPSN
+#endif//EPSN
+#define EPSN	  (0.000001f) // (1e-6)	// desired float precision
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+typedef bool bool_t;
+typedef int32_t BOOL_T;
+
+#ifndef FALSE
+#define FALSE   ((int32_t)0)
+#endif//FALSE
+
+#ifndef TRUE
+#define TRUE    ((int32_t)1)
+#endif//TRUE
+
+#ifndef void_t
+#define void_t void
+#endif
+
+#endif //WELS_TYPE_DEFINES_H__
+
--- a/codec/console/dec/src/h264dec.cpp
+++ b/codec/console/dec/src/h264dec.cpp
@@ -45,7 +45,7 @@
 #include "codec_app_def.h"
 #include "codec_api.h"
 #include "read_config.h"
-#include "../../decoder/core/inc/typedefs.h"
+#include "typedefs.h"
 #include "../../decoder/core/inc/measure_time.h"
 #include "d3d9_utils.h"
 #include "logging.h"
--- a/codec/console/dec/targets.mk
+++ b/codec/console/dec/targets.mk
@@ -1,9 +1,9 @@
 H264DEC_PREFIX=H264DEC
 H264DEC_SRCDIR=codec/console/dec
 H264DEC_CPP_SRCS=\
-	$(H264DEC_SRCDIR)/./src/d3d9_utils.cpp\
-	$(H264DEC_SRCDIR)/./src/h264dec.cpp\
 	$(H264DEC_SRCDIR)/./src/read_config.cpp\
+	$(H264DEC_SRCDIR)/./src/h264dec.cpp\
+	$(H264DEC_SRCDIR)/./src/d3d9_utils.cpp\
 
 H264DEC_OBJS += $(H264DEC_CPP_SRCS:.cpp=.o)
 ifeq ($(USE_ASM), Yes)
@@ -13,13 +13,16 @@
 endif
 
 OBJS += $(H264DEC_OBJS)
-$(H264DEC_SRCDIR)/%.o: $(H264DEC_SRCDIR)/%.cpp
-	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(H264DEC_CFLAGS) $(H264DEC_INCLUDES) -c -o $@ $<
+$(H264DEC_SRCDIR)/./src/read_config.o: $(H264DEC_SRCDIR)/./src/read_config.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(H264DEC_CFLAGS) $(H264DEC_INCLUDES) -c -o $(H264DEC_SRCDIR)/./src/read_config.o $(H264DEC_SRCDIR)/./src/read_config.cpp
 
-$(H264DEC_SRCDIR)/%.o: $(H264DEC_SRCDIR)/%.asm
-	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(H264DEC_ASMFLAGS) $(H264DEC_ASM_INCLUDES) -o $@ $<
+$(H264DEC_SRCDIR)/./src/h264dec.o: $(H264DEC_SRCDIR)/./src/h264dec.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(H264DEC_CFLAGS) $(H264DEC_INCLUDES) -c -o $(H264DEC_SRCDIR)/./src/h264dec.o $(H264DEC_SRCDIR)/./src/h264dec.cpp
 
-h264dec: $(H264DEC_OBJS) $(LIBS) $(H264DEC_LIBS) $(H264DEC_DEPS)
+$(H264DEC_SRCDIR)/./src/d3d9_utils.o: $(H264DEC_SRCDIR)/./src/d3d9_utils.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(H264DEC_CFLAGS) $(H264DEC_INCLUDES) -c -o $(H264DEC_SRCDIR)/./src/d3d9_utils.o $(H264DEC_SRCDIR)/./src/d3d9_utils.cpp
+
+h264dec: $(H264DEC_OBJS) $(LIBS) $(H264DEC_LIBS)
 	$(CXX) -o $@  $(H264DEC_OBJS) $(H264DEC_LDFLAGS) $(H264DEC_LIBS) $(LDFLAGS) $(LIBS)
 
 binaries: h264dec
--- a/codec/decoder/core/asm/intra_pred.asm
+++ b/codec/decoder/core/asm/intra_pred.asm
@@ -180,7 +180,7 @@
 WELS_EXTERN WelsDecoderI4x4LumaPredH_sse2
 WELS_EXTERN WelsDecoderI4x4LumaPredDDR_mmx
 WELS_EXTERN WelsDecoderI16x16LumaPredPlane_sse2
-WELS_EXTERN WelsI4x4LumaPredDc_sse2
+
 
 ALIGN 16
 ;*******************************************************************************
--- a/codec/decoder/core/inc/cpu.h
+++ /dev/null
@@ -1,80 +1,0 @@
-/*!
- * \copy
- *     Copyright (c)  2009-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * \file	cpu.h
- *
- * \brief	CPU feature compatibility detection
- *
- * \date	04/29/2009 Created
- *
- *************************************************************************************
- */
-#if !defined(WELS_CPU_DETECTION_H__)
-#define WELS_CPU_DETECTION_H__
-
-#include "typedefs.h"
-
-namespace WelsDec {
-
-#if defined(__cplusplus)
-extern "C" {
-#endif//__cplusplus
-
-#if defined(X86_ASM)
-/*
- *	cpuid support verify routine
- *  return 0 if cpuid is not supported by cpu
- */
-int32_t  WelsCPUIdVerify();
-
-void_t WelsCPUId (uint32_t uiIndex, uint32_t* pFeatureA, uint32_t* pFeatureB, uint32_t* pFeatureC, uint32_t* pFeatureD);
-
-int32_t WelsCPUSupportAVX (uint32_t eax, uint32_t ecx);
-int32_t WelsCPUSupportFMA (uint32_t eax, uint32_t ecx);
-
-void_t WelsEmms();
-
-uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors);
-
-/*
- *	clear FPU registers states for potential float based calculation if support
- */
-void     WelsCPURestore (const uint32_t kuiCPU);
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif//__cplusplus
-
-} // namespace WelsDec
-
-#endif//WELS_CPU_DETECTION_H__
--- a/codec/decoder/core/inc/cpu_core.h
+++ /dev/null
@@ -1,80 +1,0 @@
-/*!
- * \copy
- *     Copyright (c)  2009-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * \file	cpu_core.h
- *
- * \brief	cpu core feature detection
- *
- * \date	4/24/2009 Created
- *
- *************************************************************************************
- */
-#if !defined(WELS_CPU_CORE_FEATURE_DETECTION_H__)
-#define WELS_CPU_CORE_FEATURE_DETECTION_H__
-
-/*
- *	WELS CPU feature flags
- */
-#define WELS_CPU_MMX        0x00000001    /* mmx */
-#define WELS_CPU_MMXEXT     0x00000002    /* mmx-ext*/
-#define WELS_CPU_SSE        0x00000004    /* sse */
-#define WELS_CPU_SSE2       0x00000008    /* sse 2 */
-#define WELS_CPU_SSE3       0x00000010    /* sse 3 */
-#define WELS_CPU_SSE41      0x00000020    /* sse 4.1 */
-#define WELS_CPU_3DNOW      0x00000040    /* 3dnow! */
-#define WELS_CPU_3DNOWEXT   0x00000080    /* 3dnow! ext */
-#define WELS_CPU_ALTIVEC    0x00000100    /* altivec */
-#define WELS_CPU_SSSE3      0x00000200    /* ssse3 */
-#define WELS_CPU_SSE42      0x00000400    /* sse 4.2 */
-
-/* CPU features application extensive */
-#define WELS_CPU_AVX		0x00000800	/* Advanced Vector eXtentions */
-#define WELS_CPU_FPU		0x00001000	/* x87-FPU on chip */
-#define WELS_CPU_HTT		0x00002000	/* Hyper-Threading Technology (HTT), Multi-threading enabled feature:
-										   physical processor package is capable of supporting more than one logic processor
-										*/
-#define WELS_CPU_CMOV		0x00004000	/* Conditional Move Instructions,
-										   also if x87-FPU is present at indicated by the CPUID.FPU feature bit, then FCOMI and FCMOV are supported
-										*/
-#define WELS_CPU_MOVBE		0x00008000	/* MOVBE instruction */
-#define WELS_CPU_AES		0x00010000	/* AES instruction extensions */
-#define WELS_CPU_FMA		0x00020000	/* AVX VEX FMA instruction sets */
-
-#define WELS_CPU_CACHELINE_16    0x10000000    /* CacheLine Size 16 */
-#define WELS_CPU_CACHELINE_32    0x20000000    /* CacheLine Size 32 */
-#define WELS_CPU_CACHELINE_64    0x40000000    /* CacheLine Size 64 */
-#define WELS_CPU_CACHELINE_128   0x80000000    /* CacheLine Size 128 */
-
-/*
- *	Interfaces for CPU core feature detection as below
- */
-
-#endif//WELS_CPU_CORE_FEATURE_DETECTION_H__
--- a/codec/decoder/core/inc/deblocking.h
+++ b/codec/decoder/core/inc/deblocking.h
@@ -42,7 +42,7 @@
 #define WELS_DEBLOCKING_H__
 
 #include "decoder_context.h"
-
+#include "deblocking_common.h"
 namespace WelsDec {
 
 /*!
@@ -85,42 +85,6 @@
 void_t DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, uint8_t nBS[2][4][4], int32_t iBoundryFlag);
 
 void_t WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag);
-
-void_t DeblockLumaLt4V_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
-void_t DeblockLumaEq4V_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-
-void_t DeblockLumaLt4H_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
-void_t DeblockLumaEq4H_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-
-void_t DeblockChromaLt4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
-                            int8_t* pTc);
-void_t DeblockChromaEq4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-
-void_t DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
-                            int8_t* pTc);
-void_t DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-
-#if defined(__cplusplus)
-extern "C" {
-#endif//__cplusplus
-
-#ifdef  X86_ASM
-void DeblockLumaLt4V_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
-void DeblockLumaEq4V_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-void DeblockLumaTransposeH2V_sse2 (uint8_t* pPixY, int32_t iStride, uint8_t* pDst);
-void DeblockLumaTransposeV2H_sse2 (uint8_t* pPixY, int32_t iStride, uint8_t* pSrc);
-void DeblockLumaLt4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
-void DeblockLumaEq4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-void DeblockChromaEq4V_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-void DeblockChromaLt4V_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
-                             int8_t* pTC);
-void DeblockChromaEq4H_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-void DeblockChromaLt4H_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
-                             int8_t* pTC);
-#endif
-#if defined(__cplusplus)
-}
-#endif//__cplusplus
 
 } // namespace WelsDec
 
--- a/codec/decoder/core/inc/get_intra_predictor.h
+++ b/codec/decoder/core/inc/get_intra_predictor.h
@@ -97,7 +97,7 @@
 void_t WelsDecoderIChromaPredDcNA_mmx (uint8_t* pPred, const int32_t kiStride);
 
 
-void_t WelsI4x4LumaPredDc_sse2 (uint8_t* pPred, const int32_t kiStride);
+
 void_t WelsDecoderI4x4LumaPredDDR_mmx (uint8_t* pPred, const int32_t kiStride);
 void_t WelsDecoderI4x4LumaPredHD_mmx (uint8_t* pPred, const int32_t kiStride);
 void_t WelsDecoderI4x4LumaPredHU_mmx (uint8_t* pPred, const int32_t kiStride);
--- a/codec/decoder/core/inc/typedefs.h
+++ /dev/null
@@ -1,91 +1,0 @@
-/*!
- * \copy
- *     Copyright (c)  2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-// typedef.h
-#ifndef WELS_TYPE_DEFINES_H__
-#define WELS_TYPE_DEFINES_H__
-
-#include <limits.h>
-
-////////////////////////////////////////////////////////////////////////////
-// NOTICE : ALL internal implement MUST use the data type defined as below
-//          ONLY except with the interface file !!!!!
-////////////////////////////////////////////////////////////////////////////
-
-#ifndef  _MSC_VER
-
-#include <stdint.h>
-
-#else
-
-// FIXME:     all singed type should be declared explicit,  for example,  int8_t should be declared as signed char.
-typedef signed char      int8_t  ;
-typedef unsigned char    uint8_t ;
-typedef short            int16_t ;
-typedef unsigned short   uint16_t;
-typedef int              int32_t ;
-typedef unsigned int     uint32_t;
-typedef __int64          int64_t ;
-typedef unsigned __int64 uint64_t;
-
-#endif // _MSC_VER defined
-
-// FIXME:     all string type should be declared explicit as char.
-typedef char      str_t;
-typedef float     real32_t;
-
-#ifdef PESN
-#undef PESN
-#endif//PESN
-#define PESN	  (0.000001f) // (1e-6)	// desired float precision
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-typedef bool bool_t;
-typedef int32_t BOOL_T;
-
-#ifndef FALSE
-#define FALSE   ((int32_t)0)
-#endif//FALSE
-
-#ifndef TRUE
-#define TRUE    ((int32_t)1)
-#endif//TRUE
-
-#ifndef void_t
-#define void_t void
-#endif
-
-#endif //WELS_TYPE_DEFINES_H__
-
--- a/codec/decoder/core/src/cpu.cpp
+++ /dev/null
@@ -1,194 +1,0 @@
-/*!
- * \copy
- *     Copyright (c)  2009-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * \file	cpu.cpp
- *
- * \brief	CPU compatibility detection
- *
- * \date	04/29/2009 Created
- *
- *************************************************************************************
- */
-#include <string.h>
-
-#include "cpu.h"
-#include "cpu_core.h"
-
-namespace WelsDec {
-
-#define    CPU_Vender_AMD    "AuthenticAMD"
-#define    CPU_Vender_INTEL  "GenuineIntel"
-#define    CPU_Vender_CYRIX  "CyrixInstead"
-
-#if defined(X86_ASM)
-
-uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
-  uint32_t uiCPU = 0;
-  uint32_t uiFeatureA = 0, uiFeatureB = 0, uiFeatureC = 0, uiFeatureD = 0;
-  int32_t  CacheLineSize = 0;
-  int8_t   chVenderName[16] = { 0 };
-
-  if (!WelsCPUIdVerify()) {
-    /* cpuid is not supported in cpu */
-    return 0;
-  }
-
-  WelsCPUId (0, &uiFeatureA, (uint32_t*)&chVenderName[0], (uint32_t*)&chVenderName[8], (uint32_t*)&chVenderName[4]);
-  if (uiFeatureA == 0) {
-    /* maximum input value for basic cpuid information */
-    return 0;
-  }
-
-  WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
-  if ((uiFeatureD & 0x00800000) == 0) {
-    /* Basic MMX technology is not support in cpu, mean nothing for us so return here */
-    return 0;
-  }
-
-  uiCPU = WELS_CPU_MMX;
-  if (uiFeatureD & 0x02000000) {
-    /* SSE technology is identical to AMD MMX extensions */
-    uiCPU |= WELS_CPU_MMXEXT | WELS_CPU_SSE;
-  }
-  if (uiFeatureD & 0x04000000) {
-    /* SSE2 support here */
-    uiCPU |= WELS_CPU_SSE2;
-  }
-  if (uiFeatureD & 0x00000001) {
-    /* x87 FPU on-chip checking */
-    uiCPU |= WELS_CPU_FPU;
-  }
-  if (uiFeatureD & 0x00008000) {
-    /* CMOV instruction checking */
-    uiCPU |= WELS_CPU_CMOV;
-  }
-  if (!strcmp ((const str_t*)chVenderName, CPU_Vender_INTEL)) {	// confirmed_safe_unsafe_usage
-    if (uiFeatureD & 0x10000000) {
-      /* Multi-Threading checking: contains of multiple logic processors */
-      uiCPU |= WELS_CPU_HTT;
-    }
-  }
-
-  if (uiFeatureC & 0x00000001) {
-    /* SSE3 support here */
-    uiCPU |= WELS_CPU_SSE3;
-  }
-  if (uiFeatureC & 0x00000200) {
-    /* SSSE3 support here */
-    uiCPU |= WELS_CPU_SSSE3;
-  }
-  if (uiFeatureC & 0x00080000) {
-    /* SSE4.1 support here, 45nm Penryn processor */
-    uiCPU |= WELS_CPU_SSE41;
-  }
-  if (uiFeatureC & 0x00100000) {
-    /* SSE4.2 support here, next generation Nehalem processor */
-    uiCPU |= WELS_CPU_SSE42;
-  }
-  if (WelsCPUSupportAVX (uiFeatureA, uiFeatureC)) {
-    /* AVX supported */
-    uiCPU |= WELS_CPU_AVX;
-  }
-  if (WelsCPUSupportFMA (uiFeatureA, uiFeatureC)) {
-    /* AVX FMA supported */
-    uiCPU |= WELS_CPU_FMA;
-  }
-  if (uiFeatureC & 0x02000000) {
-    /* AES checking */
-    uiCPU |= WELS_CPU_AES;
-  }
-  if (uiFeatureC & 0x00400000) {
-    /* MOVBE checking */
-    uiCPU |= WELS_CPU_MOVBE;
-  }
-
-  if (pNumberOfLogicProcessors != NULL) {
-    // HTT enabled on chip
-    *pNumberOfLogicProcessors = (uiFeatureB & 0x00ff0000) >> 16; // feature bits: 23-16 on returned EBX
-  }
-
-  WelsCPUId (0x80000000, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
-
-  if ((!strcmp ((const str_t*)chVenderName, CPU_Vender_AMD))
-      && (uiFeatureA >= 0x80000001)) {	// confirmed_safe_unsafe_usage
-    WelsCPUId (0x80000001, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
-    if (uiFeatureD & 0x00400000) {
-      uiCPU |= WELS_CPU_MMXEXT;
-    }
-    if (uiFeatureD & 0x80000000) {
-      uiCPU |= WELS_CPU_3DNOW;
-    }
-  }
-
-  if (!strcmp ((const str_t*)chVenderName, CPU_Vender_INTEL)) {	// confirmed_safe_unsafe_usage
-    int32_t  family, model;
-
-    WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
-    family = ((uiFeatureA >> 8) & 0xf) + ((uiFeatureA >> 20) & 0xff);
-    model  = ((uiFeatureA >> 4) & 0xf) + ((uiFeatureA >> 12) & 0xf0);
-
-    if ((family == 6) && (model == 9 || model == 13 || model == 14)) {
-      uiCPU &= ~ (WELS_CPU_SSE2 | WELS_CPU_SSE3);
-    }
-  }
-
-  // get cache line size
-  if ((!strcmp ((const str_t*)chVenderName, CPU_Vender_INTEL))
-      || ! (strcmp ((const str_t*)chVenderName, CPU_Vender_CYRIX))) {	// confirmed_safe_unsafe_usage
-    WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
-
-    CacheLineSize = (uiFeatureB & 0xff00) >>
-                    5;	// ((clflush_line_size >> 8) << 3), CLFLUSH_line_size * 8 = CacheLineSize_in_byte
-
-    if (CacheLineSize == 128) {
-      uiCPU |= WELS_CPU_CACHELINE_128;
-    } else if (CacheLineSize == 64) {
-      uiCPU |= WELS_CPU_CACHELINE_64;
-    } else if (CacheLineSize == 32) {
-      uiCPU |= WELS_CPU_CACHELINE_32;
-    } else if (CacheLineSize == 16) {
-      uiCPU |= WELS_CPU_CACHELINE_16;
-    }
-  }
-
-  return uiCPU;
-}
-
-
-void WelsCPURestore (const uint32_t kuiCPU) {
-  if (kuiCPU & (WELS_CPU_MMX | WELS_CPU_MMXEXT | WELS_CPU_3DNOW | WELS_CPU_3DNOWEXT)) {
-    WelsEmms();
-  }
-}
-
-#endif
-
-} // namespace WelsDec
--- a/codec/decoder/core/src/deblocking.cpp
+++ b/codec/decoder/core/src/deblocking.cpp
@@ -618,208 +618,6 @@
   }
 }
 
-//  C code only
-void_t DeblockLumaLt4_c (uint8_t* pPix, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta,
-                         int8_t* pTc) {
-  for (int32_t i = 0; i < 16; i++) {
-    int32_t iTc0 = pTc[i >> 2];
-    if (iTc0 >= 0) {
-      int32_t p0 = pPix[-iStrideX];
-      int32_t p1 = pPix[-2 * iStrideX];
-      int32_t p2 = pPix[-3 * iStrideX];
-      int32_t q0 = pPix[0];
-      int32_t q1 = pPix[iStrideX];
-      int32_t q2 = pPix[2 * iStrideX];
-      bool_t bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
-      bool_t bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
-      bool_t bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
-      int32_t iTc = iTc0;
-      if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
-        bool_t bDetaP2P0 =  WELS_ABS (p2 - p0) < iBeta;
-        bool_t bDetaQ2Q0 =  WELS_ABS (q2 - q0) < iBeta;
-        if (bDetaP2P0) {
-          pPix[-2 * iStrideX] = p1 + WELS_CLIP3 ((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1, -iTc0, iTc0);
-          iTc++;
-        }
-        if (bDetaQ2Q0) {
-          pPix[iStrideX] = q1 + WELS_CLIP3 ((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1, -iTc0, iTc0);
-          iTc++;
-        }
-        int32_t iDeta = WELS_CLIP3 ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3, -iTc, iTc);
-        pPix[-iStrideX] = WELS_CLIP1 (p0 + iDeta);     /* p0' */
-        pPix[0]  = WELS_CLIP1 (q0 - iDeta);     /* q0' */
-      }
-    }
-    pPix += iStrideY;
-  }
-}
-void_t DeblockLumaEq4_c (uint8_t* pPix, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta) {
-  int32_t p0, p1, p2, q0, q1, q2;
-  int32_t iDetaP0Q0;
-  bool_t bDetaP1P0, bDetaQ1Q0;
-  for (int32_t i = 0; i < 16; i++) {
-    p0 = pPix[-iStrideX];
-    p1 = pPix[-2 * iStrideX];
-    p2 = pPix[-3 * iStrideX];
-    q0 = pPix[0];
-    q1 = pPix[iStrideX];
-    q2 = pPix[2 * iStrideX];
-    iDetaP0Q0 = WELS_ABS (p0 - q0);
-    bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
-    bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
-    if ((iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0) {
-      if (iDetaP0Q0 < ((iAlpha >> 2) + 2)) {
-        bool_t bDetaP2P0 = WELS_ABS (p2 - p0) < iBeta;
-        bool_t bDetaQ2Q0 =  WELS_ABS (q2 - q0) < iBeta;
-        if (bDetaP2P0) {
-          const int32_t p3 = pPix[-4 * iStrideX];
-          pPix[-iStrideX] = (p2 + (p1 << 1) + (p0 << 1) + (q0 << 1) + q1 + 4) >> 3;	   //p0
-          pPix[-2 * iStrideX] = (p2 + p1 + p0 + q0 + 2) >> 2;	 //p1
-          pPix[-3 * iStrideX] = ((p3 << 1) + p2 + (p2 << 1) + p1 + p0 + q0 + 4) >> 3;//p2
-        } else {
-          pPix[-1 * iStrideX] = ((p1 << 1) + p0 + q1 + 2) >> 2;	//p0
-        }
-        if (bDetaQ2Q0) {
-          const int32_t q3 = pPix[3 * iStrideX];
-          pPix[0] = (p1 + (p0 << 1) + (q0 << 1) + (q1 << 1) + q2 + 4) >> 3;   //q0
-          pPix[iStrideX] = (p0 + q0 + q1 + q2 + 2) >> 2;   //q1
-          pPix[2 * iStrideX] = ((q3 << 1) + q2 + (q2 << 1) + q1 + q0 + p0 + 4) >> 3;//q2
-        } else {
-          pPix[0] = ((q1 << 1) + q0 + p1 + 2) >> 2;   //q0
-        }
-      } else {
-        pPix[-iStrideX] = ((p1 << 1) + p0 + q1 + 2) >> 2;   //p0
-        pPix[ 0] = ((q1 << 1) + q0 + p1 + 2) >> 2;   //q0
-      }
-    }
-    pPix += iStrideY;
-  }
-}
-void_t DeblockLumaLt4V_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc) {
-  DeblockLumaLt4_c (pPix, iStride, 1, iAlpha, iBeta, tc);
-}
-void_t DeblockLumaLt4H_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc) {
-  DeblockLumaLt4_c (pPix, 1, iStride, iAlpha, iBeta, tc);
-}
-void_t DeblockLumaEq4V_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
-  DeblockLumaEq4_c (pPix, iStride, 1, iAlpha, iBeta);
-}
-void_t DeblockLumaEq4H_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
-  DeblockLumaEq4_c (pPix, 1, iStride, iAlpha, iBeta);
-}
-void_t DeblockChromaLt4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha,
-                           int32_t iBeta, int8_t* pTc) {
-  int32_t p0, p1, q0, q1, iDeta;
-  bool_t bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
-
-  for (int32_t i = 0; i < 8; i++) {
-    int32_t iTc0 = pTc[i >> 1];
-    if (iTc0 > 0) {
-      p0 = pPixCb[-iStrideX];
-      p1 = pPixCb[-2 * iStrideX];
-      q0 = pPixCb[0];
-      q1 = pPixCb[iStrideX];
-
-      bDetaP0Q0 =  WELS_ABS (p0 - q0) < iAlpha;
-      bDetaP1P0 =  WELS_ABS (p1 - p0) < iBeta;
-      bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
-      if (bDetaP0Q0 && bDetaP1P0 &&	bDetaQ1Q0) {
-        iDeta = WELS_CLIP3 ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3, -iTc0, iTc0);
-        pPixCb[-iStrideX] = WELS_CLIP1 (p0 + iDeta);     /* p0' */
-        pPixCb[0]  = WELS_CLIP1 (q0 - iDeta);     /* q0' */
-      }
-
-
-      p0 = pPixCr[-iStrideX];
-      p1 = pPixCr[-2 * iStrideX];
-      q0 = pPixCr[0];
-      q1 = pPixCr[iStrideX];
-
-      bDetaP0Q0 =  WELS_ABS (p0 - q0) < iAlpha;
-      bDetaP1P0 =  WELS_ABS (p1 - p0) < iBeta;
-      bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
-
-      if (bDetaP0Q0 && bDetaP1P0 &&	bDetaQ1Q0) {
-        iDeta = WELS_CLIP3 ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3, -iTc0, iTc0);
-        pPixCr[-iStrideX] = WELS_CLIP1 (p0 + iDeta);     /* p0' */
-        pPixCr[0]  = WELS_CLIP1 (q0 - iDeta);     /* q0' */
-      }
-    }
-    pPixCb += iStrideY;
-    pPixCr += iStrideY;
-  }
-}
-void_t DeblockChromaEq4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha,
-                           int32_t iBeta) {
-  int32_t i = 0, d = 0;
-  int32_t p0, p1, q0, q1;
-  bool_t bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
-  for (int32_t i = 0; i < 8; i++) {
-    //cb
-    p0 = pPixCb[-iStrideX];
-    p1 = pPixCb[-2 * iStrideX];
-    q0 = pPixCb[0];
-    q1 = pPixCb[iStrideX];
-    bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
-    bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
-    bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
-    if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
-      pPixCb[-iStrideX] = ((p1 << 1) + p0 + q1 + 2) >> 2;     /* p0' */
-      pPixCb[0]  = ((q1 << 1) + q0 + p1 + 2) >> 2;     /* q0' */
-    }
-
-    //cr
-    p0 = pPixCr[-iStrideX];
-    p1 = pPixCr[-2 * iStrideX];
-    q0 = pPixCr[0];
-    q1 = pPixCr[iStrideX];
-    bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
-    bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
-    bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
-    if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
-      pPixCr[-iStrideX] = ((p1 << 1) + p0 + q1 + 2) >> 2;     /* p0' */
-      pPixCr[0]  = ((q1 << 1) + q0 + p1 + 2) >> 2;     /* q0' */
-    }
-    pPixCr += iStrideY;
-    pPixCb += iStrideY;
-  }
-}
-void_t DeblockChromaLt4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
-                            int8_t* tc) {
-  DeblockChromaLt4_c (pPixCb, pPixCr, iStride, 1, iAlpha, iBeta, tc);
-}
-void_t DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
-                            int8_t* tc) {
-  DeblockChromaLt4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta, tc);
-}
-void_t DeblockChromaEq4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
-  DeblockChromaEq4_c (pPixCb, pPixCr, iStride, 1, iAlpha, iBeta);
-}
-void_t DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
-  DeblockChromaEq4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta);
-}
-
-#ifdef X86_ASM
-extern "C" {
-  void DeblockLumaLt4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
-    FORCE_STACK_ALIGN_1D (uint8_t,  uiBuf,   16 * 8, 16);
-
-    DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
-    DeblockLumaLt4V_sse2 (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc);
-    DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]);
-  }
-
-  void DeblockLumaEq4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
-    FORCE_STACK_ALIGN_1D (uint8_t,  uiBuf,   16 * 8, 16);
-
-    DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
-    DeblockLumaEq4V_sse2 (&uiBuf[4 * 16], 16, iAlpha, iBeta);
-    DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]);
-  }
-
-}
-
-#endif
 /*!
  * \brief	AVC slice deblocking filtering target layer
  *
@@ -924,4 +722,4 @@
 
 }
 
-} // namespace WelsDec
\ No newline at end of file
+} // namespace WelsDec
--- a/codec/decoder/core/src/decoder.cpp
+++ b/codec/decoder/core/src/decoder.cpp
@@ -775,4 +775,4 @@
   WelsBlockFuncInit (&pCtx->sBlockFunc, pCtx->uiCpuFlag);
 }
 
-} // namespace WelsDec
\ No newline at end of file
+} // namespace WelsDec
--- a/codec/decoder/targets.mk
+++ b/codec/decoder/targets.mk
@@ -1,46 +1,114 @@
 DECODER_PREFIX=DECODER
 DECODER_SRCDIR=codec/decoder
 DECODER_CPP_SRCS=\
-	$(DECODER_SRCDIR)/./core/src/au_parser.cpp\
-	$(DECODER_SRCDIR)/./core/src/bit_stream.cpp\
-	$(DECODER_SRCDIR)/./core/src/cpu.cpp\
+	$(DECODER_SRCDIR)/./core/src/mc.cpp\
+	$(DECODER_SRCDIR)/./core/src/get_intra_predictor.cpp\
 	$(DECODER_SRCDIR)/./core/src/deblocking.cpp\
-	$(DECODER_SRCDIR)/./core/src/decode_mb_aux.cpp\
+	$(DECODER_SRCDIR)/./core/src/au_parser.cpp\
 	$(DECODER_SRCDIR)/./core/src/decode_slice.cpp\
+	$(DECODER_SRCDIR)/./core/src/pic_queue.cpp\
+	$(DECODER_SRCDIR)/./core/src/utils.cpp\
 	$(DECODER_SRCDIR)/./core/src/decoder.cpp\
-	$(DECODER_SRCDIR)/./core/src/decoder_core.cpp\
-	$(DECODER_SRCDIR)/./core/src/decoder_data_tables.cpp\
-	$(DECODER_SRCDIR)/./core/src/expand_pic.cpp\
 	$(DECODER_SRCDIR)/./core/src/fmo.cpp\
-	$(DECODER_SRCDIR)/./core/src/get_intra_predictor.cpp\
-	$(DECODER_SRCDIR)/./core/src/manage_dec_ref.cpp\
-	$(DECODER_SRCDIR)/./core/src/mc.cpp\
-	$(DECODER_SRCDIR)/./core/src/mem_align.cpp\
-	$(DECODER_SRCDIR)/./core/src/memmgr_nal_unit.cpp\
 	$(DECODER_SRCDIR)/./core/src/mv_pred.cpp\
+	$(DECODER_SRCDIR)/./core/src/memmgr_nal_unit.cpp\
+	$(DECODER_SRCDIR)/./core/src/decoder_core.cpp\
 	$(DECODER_SRCDIR)/./core/src/parse_mb_syn_cavlc.cpp\
-	$(DECODER_SRCDIR)/./core/src/pic_queue.cpp\
+	$(DECODER_SRCDIR)/./core/src/decoder_data_tables.cpp\
+	$(DECODER_SRCDIR)/./core/src/mem_align.cpp\
+	$(DECODER_SRCDIR)/./core/src/bit_stream.cpp\
+	$(DECODER_SRCDIR)/./core/src/manage_dec_ref.cpp\
 	$(DECODER_SRCDIR)/./core/src/rec_mb.cpp\
-	$(DECODER_SRCDIR)/./core/src/utils.cpp\
-	$(DECODER_SRCDIR)/./plus/src/welsCodecTrace.cpp\
+	$(DECODER_SRCDIR)/./core/src/expand_pic.cpp\
+	$(DECODER_SRCDIR)/./core/src/decode_mb_aux.cpp\
 	$(DECODER_SRCDIR)/./plus/src/welsDecoderExt.cpp\
+	$(DECODER_SRCDIR)/./plus/src/welsCodecTrace.cpp\
 
 DECODER_OBJS += $(DECODER_CPP_SRCS:.cpp=.o)
 ifeq ($(USE_ASM), Yes)
 DECODER_ASM_SRCS=\
-	$(DECODER_SRCDIR)/./core/asm/block_add.asm\
 	$(DECODER_SRCDIR)/./core/asm/dct.asm\
 	$(DECODER_SRCDIR)/./core/asm/intra_pred.asm\
+	$(DECODER_SRCDIR)/./core/asm/block_add.asm\
 
 DECODER_OBJS += $(DECODER_ASM_SRCS:.asm=.o)
 endif
 
 OBJS += $(DECODER_OBJS)
-$(DECODER_SRCDIR)/%.o: $(DECODER_SRCDIR)/%.cpp
-	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $@ $<
+$(DECODER_SRCDIR)/./core/src/mc.o: $(DECODER_SRCDIR)/./core/src/mc.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/mc.o $(DECODER_SRCDIR)/./core/src/mc.cpp
 
-$(DECODER_SRCDIR)/%.o: $(DECODER_SRCDIR)/%.asm
-	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $@ $<
+$(DECODER_SRCDIR)/./core/src/get_intra_predictor.o: $(DECODER_SRCDIR)/./core/src/get_intra_predictor.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/get_intra_predictor.o $(DECODER_SRCDIR)/./core/src/get_intra_predictor.cpp
+
+$(DECODER_SRCDIR)/./core/src/deblocking.o: $(DECODER_SRCDIR)/./core/src/deblocking.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/deblocking.o $(DECODER_SRCDIR)/./core/src/deblocking.cpp
+
+$(DECODER_SRCDIR)/./core/src/au_parser.o: $(DECODER_SRCDIR)/./core/src/au_parser.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/au_parser.o $(DECODER_SRCDIR)/./core/src/au_parser.cpp
+
+$(DECODER_SRCDIR)/./core/src/decode_slice.o: $(DECODER_SRCDIR)/./core/src/decode_slice.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/decode_slice.o $(DECODER_SRCDIR)/./core/src/decode_slice.cpp
+
+$(DECODER_SRCDIR)/./core/src/pic_queue.o: $(DECODER_SRCDIR)/./core/src/pic_queue.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/pic_queue.o $(DECODER_SRCDIR)/./core/src/pic_queue.cpp
+
+$(DECODER_SRCDIR)/./core/src/utils.o: $(DECODER_SRCDIR)/./core/src/utils.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/utils.o $(DECODER_SRCDIR)/./core/src/utils.cpp
+
+$(DECODER_SRCDIR)/./core/src/decoder.o: $(DECODER_SRCDIR)/./core/src/decoder.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/decoder.o $(DECODER_SRCDIR)/./core/src/decoder.cpp
+
+$(DECODER_SRCDIR)/./core/src/fmo.o: $(DECODER_SRCDIR)/./core/src/fmo.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/fmo.o $(DECODER_SRCDIR)/./core/src/fmo.cpp
+
+$(DECODER_SRCDIR)/./core/src/mv_pred.o: $(DECODER_SRCDIR)/./core/src/mv_pred.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/mv_pred.o $(DECODER_SRCDIR)/./core/src/mv_pred.cpp
+
+$(DECODER_SRCDIR)/./core/src/memmgr_nal_unit.o: $(DECODER_SRCDIR)/./core/src/memmgr_nal_unit.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/memmgr_nal_unit.o $(DECODER_SRCDIR)/./core/src/memmgr_nal_unit.cpp
+
+$(DECODER_SRCDIR)/./core/src/decoder_core.o: $(DECODER_SRCDIR)/./core/src/decoder_core.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/decoder_core.o $(DECODER_SRCDIR)/./core/src/decoder_core.cpp
+
+$(DECODER_SRCDIR)/./core/src/parse_mb_syn_cavlc.o: $(DECODER_SRCDIR)/./core/src/parse_mb_syn_cavlc.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/parse_mb_syn_cavlc.o $(DECODER_SRCDIR)/./core/src/parse_mb_syn_cavlc.cpp
+
+$(DECODER_SRCDIR)/./core/src/decoder_data_tables.o: $(DECODER_SRCDIR)/./core/src/decoder_data_tables.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/decoder_data_tables.o $(DECODER_SRCDIR)/./core/src/decoder_data_tables.cpp
+
+$(DECODER_SRCDIR)/./core/src/mem_align.o: $(DECODER_SRCDIR)/./core/src/mem_align.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/mem_align.o $(DECODER_SRCDIR)/./core/src/mem_align.cpp
+
+$(DECODER_SRCDIR)/./core/src/bit_stream.o: $(DECODER_SRCDIR)/./core/src/bit_stream.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/bit_stream.o $(DECODER_SRCDIR)/./core/src/bit_stream.cpp
+
+$(DECODER_SRCDIR)/./core/src/manage_dec_ref.o: $(DECODER_SRCDIR)/./core/src/manage_dec_ref.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/manage_dec_ref.o $(DECODER_SRCDIR)/./core/src/manage_dec_ref.cpp
+
+$(DECODER_SRCDIR)/./core/src/rec_mb.o: $(DECODER_SRCDIR)/./core/src/rec_mb.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/rec_mb.o $(DECODER_SRCDIR)/./core/src/rec_mb.cpp
+
+$(DECODER_SRCDIR)/./core/src/expand_pic.o: $(DECODER_SRCDIR)/./core/src/expand_pic.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/expand_pic.o $(DECODER_SRCDIR)/./core/src/expand_pic.cpp
+
+$(DECODER_SRCDIR)/./core/src/decode_mb_aux.o: $(DECODER_SRCDIR)/./core/src/decode_mb_aux.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./core/src/decode_mb_aux.o $(DECODER_SRCDIR)/./core/src/decode_mb_aux.cpp
+
+$(DECODER_SRCDIR)/./plus/src/welsDecoderExt.o: $(DECODER_SRCDIR)/./plus/src/welsDecoderExt.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./plus/src/welsDecoderExt.o $(DECODER_SRCDIR)/./plus/src/welsDecoderExt.cpp
+
+$(DECODER_SRCDIR)/./plus/src/welsCodecTrace.o: $(DECODER_SRCDIR)/./plus/src/welsCodecTrace.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./plus/src/welsCodecTrace.o $(DECODER_SRCDIR)/./plus/src/welsCodecTrace.cpp
+
+$(DECODER_SRCDIR)/./core/asm/dct.o: $(DECODER_SRCDIR)/./core/asm/dct.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/dct.o $(DECODER_SRCDIR)/./core/asm/dct.asm
+
+$(DECODER_SRCDIR)/./core/asm/intra_pred.o: $(DECODER_SRCDIR)/./core/asm/intra_pred.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/intra_pred.o $(DECODER_SRCDIR)/./core/asm/intra_pred.asm
+
+$(DECODER_SRCDIR)/./core/asm/block_add.o: $(DECODER_SRCDIR)/./core/asm/block_add.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/block_add.o $(DECODER_SRCDIR)/./core/asm/block_add.asm
 
 $(LIBPREFIX)decoder.$(LIBSUFFIX): $(DECODER_OBJS)
 	rm -f $(LIBPREFIX)decoder.$(LIBSUFFIX)
--- a/codec/encoder/core/inc/cpu.h
+++ /dev/null
@@ -1,78 +1,0 @@
-/*!
- * \copy
- *     Copyright (c)  2009-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * \file	cpu.h
- *
- * \brief	CPU feature compatibility detection
- *
- * \date	04/29/2009
- *
- *************************************************************************************
- */
-#if !defined(WELS_CPU_DETECTION_H__)
-#define WELS_CPU_DETECTION_H__
-
-#include "typedefs.h"
-
-namespace WelsSVCEnc {
-#if defined(__cplusplus)
-extern "C" {
-#endif//__cplusplus
-
-#if defined(X86_ASM)
-/*
- *	cpuid support verify routine
- *  return 0 if cpuid is not supported by cpu
- */
-int32_t  WelsCPUIdVerify();
-
-void WelsCPUId (uint32_t uiIndex, uint32_t* pFeatureA, uint32_t* pFeatureB, uint32_t* pFeatureC, uint32_t* pFeatureD);
-
-int32_t WelsCPUSupportAVX (uint32_t eax, uint32_t ecx);
-int32_t WelsCPUSupportFMA (uint32_t eax, uint32_t ecx);
-
-void WelsEmms();
-
-uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors);
-
-/*
- *	clear FPU registers states for potential float based calculation if support
- */
-void     WelsCPURestore (const uint32_t kuiCPU);
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif//__cplusplus
-
-}
-#endif//WELS_CPU_DETECTION_H__
--- a/codec/encoder/core/inc/cpu_core.h
+++ /dev/null
@@ -1,80 +1,0 @@
-/*!
- * \copy
- *     Copyright (c)  2009-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * \file	cpu_core.h
- *
- * \brief	cpu core feature detection
- *
- * \date	4/24/2009 Created
- *
- *************************************************************************************
- */
-#if !defined(WELS_CPU_CORE_FEATURE_DETECTION_H__)
-#define WELS_CPU_CORE_FEATURE_DETECTION_H__
-
-/*
- *	WELS CPU feature flags
- */
-#define WELS_CPU_MMX        0x00000001    /* mmx */
-#define WELS_CPU_MMXEXT     0x00000002    /* mmx-ext*/
-#define WELS_CPU_SSE        0x00000004    /* sse */
-#define WELS_CPU_SSE2       0x00000008    /* sse 2 */
-#define WELS_CPU_SSE3       0x00000010    /* sse 3 */
-#define WELS_CPU_SSE41      0x00000020    /* sse 4.1 */
-#define WELS_CPU_3DNOW      0x00000040    /* 3dnow! */
-#define WELS_CPU_3DNOWEXT   0x00000080    /* 3dnow! ext */
-#define WELS_CPU_ALTIVEC    0x00000100    /* altivec */
-#define WELS_CPU_SSSE3      0x00000200    /* ssse3 */
-#define WELS_CPU_SSE42      0x00000400    /* sse 4.2 */
-
-/* CPU features application extensive */
-#define WELS_CPU_AVX		0x00000800	/* Advanced Vector eXtentions */
-#define WELS_CPU_FPU		0x00001000	/* x87-FPU on chip */
-#define WELS_CPU_HTT		0x00002000	/* Hyper-Threading Technology (HTT), Multi-threading enabled feature:
-										   physical processor package is capable of supporting more than one logic processor
-										*/
-#define WELS_CPU_CMOV		0x00004000	/* Conditional Move Instructions,
-										   also if x87-FPU is present at indicated by the CPUID.FPU feature bit, then FCOMI and FCMOV are supported
-										*/
-#define WELS_CPU_MOVBE		0x00008000	/* MOVBE instruction */
-#define WELS_CPU_AES		0x00010000	/* AES instruction extensions */
-#define WELS_CPU_FMA		0x00020000	/* AVX VEX FMA instruction sets */
-
-#define WELS_CPU_CACHELINE_16    0x10000000    /* CacheLine Size 16 */
-#define WELS_CPU_CACHELINE_32    0x20000000    /* CacheLine Size 32 */
-#define WELS_CPU_CACHELINE_64    0x40000000    /* CacheLine Size 64 */
-#define WELS_CPU_CACHELINE_128   0x80000000    /* CacheLine Size 128 */
-
-/*
- *	Interfaces for CPU core feature detection as below
- */
-
-#endif//WELS_CPU_CORE_FEATURE_DETECTION_H__
--- a/codec/encoder/core/inc/deblocking.h
+++ b/codec/encoder/core/inc/deblocking.h
@@ -43,7 +43,7 @@
 
 #include "encoder_context.h"
 #include "wels_func_ptr_def.h"
-
+#include "deblocking_common.h"
 namespace WelsSVCEnc {
 
 
@@ -60,51 +60,6 @@
 uint8_t     uiFilterIdc;
 uint8_t     uiReserved;
 } SDeblockingFilter;
-
-void DeblockLumaLt4_c (uint8_t* pPixY, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
-void DeblockLumaEq4_c (uint8_t* pPixY, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta);
-void DeblockChromaLt4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha,
-                         int32_t iBeta, int8_t* pTc);
-void DeblockChromaEq4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha,
-                         int32_t iBeta);
-
-
-void DeblockLumaLt4V_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
-void DeblockLumaEq4V_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-
-void DeblockLumaLt4H_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
-void DeblockLumaEq4H_c (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-
-void DeblockChromaLt4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
-                          int8_t* pTc);
-void DeblockChromaEq4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-
-void DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
-                          int8_t* pTc);
-void DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-
-#if defined(__cplusplus)
-extern "C" {
-#endif//__cplusplus
-
-#ifdef  X86_ASM
-void DeblockLumaLt4V_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
-void DeblockLumaEq4V_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-void DeblockLumaTransposeH2V_sse2 (uint8_t* pPixY, int32_t iStride, uint8_t* pDst);
-void DeblockLumaTransposeV2H_sse2 (uint8_t* pPixY, int32_t iStride, uint8_t* pSrc);
-void DeblockLumaLt4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
-void DeblockLumaEq4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-void DeblockChromaEq4V_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-void DeblockChromaLt4V_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
-                             int8_t* pTC);
-void DeblockChromaEq4H_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
-void DeblockChromaLt4H_sse2 (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
-                             int8_t* pTC);
-#endif
-
-#if defined(__cplusplus)
-}
-#endif//__cplusplus
 
 void DeblockingInit (DeblockingFunc*   pFunc,  int32_t iCpu);
 
--- a/codec/encoder/core/inc/typedefs.h
+++ /dev/null
@@ -1,88 +1,0 @@
-/*!
- * \copy
- *     Copyright (c)  2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-// typedef.h
-#ifndef WELS_TYPE_DEFINES_H__
-#define WELS_TYPE_DEFINES_H__
-
-#include <limits.h>
-
-////////////////////////////////////////////////////////////////////////////
-// NOTICE : ALL internal implement MUST use the pData type defined as below
-//          ONLY except with the interface file !!!!!
-////////////////////////////////////////////////////////////////////////////
-
-#ifndef  _MSC_VER
-
-#include <stdint.h>
-
-#else
-
-// FIXME:     all singed type should be declared explicit,  for example,  int8_t should be declared as signed char.
-typedef signed char      int8_t  ;
-typedef unsigned char    uint8_t ;
-typedef short            int16_t ;
-typedef unsigned short   uint16_t;
-typedef int              int32_t ;
-typedef unsigned int     uint32_t;
-typedef __int64          int64_t ;
-typedef unsigned __int64 uint64_t;
-
-#endif // _MSC_VER defined
-
-// FIXME:     all string type should be declared explicit as char.
-typedef char      str_t;
-typedef float     real32_t;
-
-#ifdef EPSN
-#undef EPSN
-#endif//EPSN
-#define EPSN	  (0.000001f) // (1e-6)	// desired float precision
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-
-typedef bool bool_t;
-typedef int32_t BOOL_T;
-
-#ifndef FALSE
-#define FALSE   ((int32_t)0)
-#endif//FALSE
-
-#ifndef TRUE
-#define TRUE    ((int32_t)1)
-#endif//TRUE
-
-
-#endif //WELS_TYPE_DEFINES_H__
--- a/codec/encoder/core/src/cpu.cpp
+++ /dev/null
@@ -1,196 +1,0 @@
-/*!
- * \copy
- *     Copyright (c)  2009-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * \file	cpu.c
- *
- * \brief	CPU compatibility detection
- *
- * \date	04/29/2009 Created
- *
- *************************************************************************************
- */
-
-#include <string.h>
-
-#include "cpu.h"
-#include "cpu_core.h"
-
-
-namespace WelsSVCEnc {
-#define    CPU_Vender_AMD    "AuthenticAMD"
-#define    CPU_Vender_INTEL  "GenuineIntel"
-#define    CPU_Vender_CYRIX  "CyrixInstead"
-
-
-#if defined(X86_ASM)
-
-uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
-  uint32_t uiCPU = 0;
-  uint32_t uiFeatureA = 0, uiFeatureB = 0, uiFeatureC = 0, uiFeatureD = 0;
-  int32_t  CacheLineSize = 0;
-  int8_t   chVenderName[16] = { 0 };
-
-  if (!WelsCPUIdVerify()) {
-    /* cpuid is not supported in cpu */
-    return 0;
-  }
-
-  WelsCPUId (0, &uiFeatureA, (uint32_t*)&chVenderName[0], (uint32_t*)&chVenderName[8], (uint32_t*)&chVenderName[4]);
-  if (uiFeatureA == 0) {
-    /* maximum input value for basic cpuid information */
-    return 0;
-  }
-
-  WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
-  if ((uiFeatureD & 0x00800000) == 0) {
-    /* Basic MMX technology is not support in cpu, mean nothing for us so return here */
-    return 0;
-  }
-
-  uiCPU = WELS_CPU_MMX;
-  if (uiFeatureD & 0x02000000) {
-    /* SSE technology is identical to AMD MMX extensions */
-    uiCPU |= WELS_CPU_MMXEXT | WELS_CPU_SSE;
-  }
-  if (uiFeatureD & 0x04000000) {
-    /* SSE2 support here */
-    uiCPU |= WELS_CPU_SSE2;
-  }
-  if (uiFeatureD & 0x00000001) {
-    /* x87 FPU on-chip checking */
-    uiCPU |= WELS_CPU_FPU;
-  }
-  if (uiFeatureD & 0x00008000) {
-    /* CMOV instruction checking */
-    uiCPU |= WELS_CPU_CMOV;
-  }
-  if (!strcmp ((const str_t*)chVenderName, CPU_Vender_INTEL)) {	// confirmed_safe_unsafe_usage
-    if (uiFeatureD & 0x10000000) {
-      /* Multi-Threading checking: contains of multiple logic processors */
-      uiCPU |= WELS_CPU_HTT;
-    }
-  }
-
-  if (uiFeatureC & 0x00000001) {
-    /* SSE3 support here */
-    uiCPU |= WELS_CPU_SSE3;
-  }
-  if (uiFeatureC & 0x00000200) {
-    /* SSSE3 support here */
-    uiCPU |= WELS_CPU_SSSE3;
-  }
-  if (uiFeatureC & 0x00080000) {
-    /* SSE4.1 support here, 45nm Penryn processor */
-    uiCPU |= WELS_CPU_SSE41;
-  }
-  if (uiFeatureC & 0x00100000) {
-    /* SSE4.2 support here, next generation Nehalem processor */
-    uiCPU |= WELS_CPU_SSE42;
-  }
-  if (WelsCPUSupportAVX (uiFeatureA, uiFeatureC)) {	//
-    /* AVX supported */
-    uiCPU |= WELS_CPU_AVX;
-  }
-  if (WelsCPUSupportFMA (uiFeatureA, uiFeatureC)) {	//
-    /* AVX FMA supported */
-    uiCPU |= WELS_CPU_FMA;
-  }
-  if (uiFeatureC & 0x02000000) {
-    /* AES checking */
-    uiCPU |= WELS_CPU_AES;
-  }
-  if (uiFeatureC & 0x00400000) {
-    /* MOVBE checking */
-    uiCPU |= WELS_CPU_MOVBE;
-  }
-
-  if (pNumberOfLogicProcessors != NULL) {
-    // HTT enabled on chip
-    *pNumberOfLogicProcessors = (uiFeatureB & 0x00ff0000) >> 16; // feature bits: 23-16 on returned EBX
-  }
-
-  WelsCPUId (0x80000000, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
-
-  if ((!strcmp ((const str_t*)chVenderName, CPU_Vender_AMD))
-      && (uiFeatureA >= 0x80000001)) {	// confirmed_safe_unsafe_usage
-    WelsCPUId (0x80000001, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
-    if (uiFeatureD & 0x00400000) {
-      uiCPU |= WELS_CPU_MMXEXT;
-    }
-    if (uiFeatureD & 0x80000000) {
-      uiCPU |= WELS_CPU_3DNOW;
-    }
-  }
-
-  if (!strcmp ((const str_t*)chVenderName, CPU_Vender_INTEL)) {	// confirmed_safe_unsafe_usage
-    int32_t  family, model;
-
-    WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
-    family = ((uiFeatureA >> 8) & 0xf) + ((uiFeatureA >> 20) & 0xff);
-    model  = ((uiFeatureA >> 4) & 0xf) + ((uiFeatureA >> 12) & 0xf0);
-
-    if ((family == 6) && (model == 9 || model == 13 || model == 14)) {
-      uiCPU &= ~ (WELS_CPU_SSE2 | WELS_CPU_SSE3);
-    }
-  }
-
-  // get cache line size
-  if ((!strcmp ((const str_t*)chVenderName, CPU_Vender_INTEL))
-      || ! (strcmp ((const str_t*)chVenderName, CPU_Vender_CYRIX))) {	// confirmed_safe_unsafe_usage
-    WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
-
-    CacheLineSize = (uiFeatureB & 0xff00) >>
-                    5;	// ((clflush_line_size >> 8) << 3), CLFLUSH_line_size * 8 = CacheLineSize_in_byte
-
-    if (CacheLineSize == 128) {
-      uiCPU |= WELS_CPU_CACHELINE_128;
-    } else if (CacheLineSize == 64) {
-      uiCPU |= WELS_CPU_CACHELINE_64;
-    } else if (CacheLineSize == 32) {
-      uiCPU |= WELS_CPU_CACHELINE_32;
-    } else if (CacheLineSize == 16) {
-      uiCPU |= WELS_CPU_CACHELINE_16;
-    }
-  }
-
-  return uiCPU;
-}
-
-
-void WelsCPURestore (const uint32_t kuiCPU) {
-  if (kuiCPU & (WELS_CPU_MMX | WELS_CPU_MMXEXT | WELS_CPU_3DNOW | WELS_CPU_3DNOWEXT)) {
-    WelsEmms();
-  }
-}
-
-#endif
-
-}
--- a/codec/encoder/core/src/deblocking.cpp
+++ b/codec/encoder/core/src/deblocking.cpp
@@ -638,190 +638,6 @@
   }
 }
 
-//  C code only
-
-void DeblockLumaLt4_c (uint8_t* pPix, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
-  for (int32_t i = 0; i < 16; i++) {
-    int32_t iTc0 = pTc[i >> 2];
-    if (iTc0 >= 0) {
-      int32_t p0 = pPix[-iStrideX];
-      int32_t p1 = pPix[-2 * iStrideX];
-      int32_t p2 = pPix[-3 * iStrideX];
-      int32_t q0 = pPix[0];
-      int32_t q1 = pPix[iStrideX];
-      int32_t q2 = pPix[2 * iStrideX];
-      bool_t bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
-      bool_t bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
-      bool_t bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
-      int32_t iTc = iTc0;
-      if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
-        bool_t bDetaP2P0 =  WELS_ABS (p2 - p0) < iBeta;
-        bool_t bDetaQ2Q0 =  WELS_ABS (q2 - q0) < iBeta;
-        if (bDetaP2P0) {
-          pPix[-2 * iStrideX] = p1 + WELS_CLIP3 ((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1, -iTc0, iTc0);
-          iTc++;
-        }
-        if (bDetaQ2Q0) {
-          pPix[iStrideX] = q1 + WELS_CLIP3 ((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1, -iTc0, iTc0);
-          iTc++;
-        }
-        int32_t iDeta = WELS_CLIP3 ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3, -iTc, iTc);
-        pPix[-iStrideX] = WELS_CLIP1 (p0 + iDeta);     /* p0' */
-        pPix[0]  = WELS_CLIP1 (q0 - iDeta);     /* q0' */
-      }
-    }
-    pPix += iStrideY;
-  }
-}
-
-
-void DeblockLumaEq4_c (uint8_t* pPix, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta) {
-  int32_t p0, p1, p2, q0, q1, q2;
-  int32_t iDetaP0Q0;
-  bool_t bDetaP1P0, bDetaQ1Q0;
-  for (int32_t i = 0; i < 16; i++) {
-    p0 = pPix[-iStrideX];
-    p1 = pPix[-2 * iStrideX];
-    p2 = pPix[-3 * iStrideX];
-    q0 = pPix[0];
-    q1 = pPix[iStrideX];
-    q2 = pPix[2 * iStrideX];
-    iDetaP0Q0 = WELS_ABS (p0 - q0);
-    bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
-    bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
-    if ((iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0) {
-      if (iDetaP0Q0 < ((iAlpha >> 2) + 2)) {
-        bool_t bDetaP2P0 = WELS_ABS (p2 - p0) < iBeta;
-        bool_t bDetaQ2Q0 =  WELS_ABS (q2 - q0) < iBeta;
-        if (bDetaP2P0) {
-          const int32_t p3 = pPix[-4 * iStrideX];
-          pPix[-iStrideX] = (p2 + (p1 << 1) + (p0 << 1) + (q0 << 1) + q1 + 4) >> 3;	   //p0
-          pPix[-2 * iStrideX] = (p2 + p1 + p0 + q0 + 2) >> 2;	 //p1
-          pPix[-3 * iStrideX] = ((p3 << 1) + p2 + (p2 << 1) + p1 + p0 + q0 + 4) >> 3;//p2
-        } else {
-          pPix[-1 * iStrideX] = ((p1 << 1) + p0 + q1 + 2) >> 2;	//p0
-        }
-        if (bDetaQ2Q0) {
-          const int32_t q3 = pPix[3 * iStrideX];
-          pPix[0] = (p1 + (p0 << 1) + (q0 << 1) + (q1 << 1) + q2 + 4) >> 3;   //q0
-          pPix[iStrideX] = (p0 + q0 + q1 + q2 + 2) >> 2;   //q1
-          pPix[2 * iStrideX] = ((q3 << 1) + q2 + (q2 << 1) + q1 + q0 + p0 + 4) >> 3;//q2
-        } else {
-          pPix[0] = ((q1 << 1) + q0 + p1 + 2) >> 2;   //q0
-        }
-      } else {
-        pPix[-iStrideX] = ((p1 << 1) + p0 + q1 + 2) >> 2;   //p0
-        pPix[ 0] = ((q1 << 1) + q0 + p1 + 2) >> 2;   //q0
-      }
-    }
-    pPix += iStrideY;
-  }
-}
-void DeblockLumaLt4V_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc) {
-  DeblockLumaLt4_c (pPix, iStride, 1, iAlpha, iBeta, iTc);
-}
-void DeblockLumaLt4H_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc) {
-  DeblockLumaLt4_c (pPix, 1, iStride, iAlpha, iBeta, iTc);
-}
-void DeblockLumaEq4V_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
-  DeblockLumaEq4_c (pPix, iStride, 1, iAlpha, iBeta);
-}
-void DeblockLumaEq4H_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
-  DeblockLumaEq4_c (pPix, 1, iStride, iAlpha, iBeta);
-}
-void DeblockChromaLt4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha,
-                         int32_t iBeta, int8_t* pTc) {
-  int32_t p0, p1, q0, q1, iDeta;
-  bool_t bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
-
-  for (int32_t i = 0; i < 8; i++) {
-    int32_t iTc0 = pTc[i >> 1];
-    if (iTc0 > 0) {
-      p0 = pPixCb[-iStrideX];
-      p1 = pPixCb[-2 * iStrideX];
-      q0 = pPixCb[0];
-      q1 = pPixCb[iStrideX];
-
-      bDetaP0Q0 =  WELS_ABS (p0 - q0) < iAlpha;
-      bDetaP1P0 =  WELS_ABS (p1 - p0) < iBeta;
-      bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
-      if (bDetaP0Q0 && bDetaP1P0 &&	bDetaQ1Q0) {
-        iDeta = WELS_CLIP3 ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3, -iTc0, iTc0);
-        pPixCb[-iStrideX] = WELS_CLIP1 (p0 + iDeta);     /* p0' */
-        pPixCb[0]  = WELS_CLIP1 (q0 - iDeta);     /* q0' */
-      }
-
-
-      p0 = pPixCr[-iStrideX];
-      p1 = pPixCr[-2 * iStrideX];
-      q0 = pPixCr[0];
-      q1 = pPixCr[iStrideX];
-
-      bDetaP0Q0 =  WELS_ABS (p0 - q0) < iAlpha;
-      bDetaP1P0 =  WELS_ABS (p1 - p0) < iBeta;
-      bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
-
-      if (bDetaP0Q0 && bDetaP1P0 &&	bDetaQ1Q0) {
-        iDeta = WELS_CLIP3 ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3, -iTc0, iTc0);
-        pPixCr[-iStrideX] = WELS_CLIP1 (p0 + iDeta);     /* p0' */
-        pPixCr[0]  = WELS_CLIP1 (q0 - iDeta);     /* q0' */
-      }
-    }
-    pPixCb += iStrideY;
-    pPixCr += iStrideY;
-  }
-}
-void DeblockChromaEq4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha,
-                         int32_t iBeta) {
-  int32_t i = 0, d = 0;
-  int32_t p0, p1, q0, q1;
-  bool_t bDetaP0Q0, bDetaP1P0, bDetaQ1Q0;
-  for (int32_t i = 0; i < 8; i++) {
-    //cb
-    p0 = pPixCb[-iStrideX];
-    p1 = pPixCb[-2 * iStrideX];
-    q0 = pPixCb[0];
-    q1 = pPixCb[iStrideX];
-    bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
-    bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
-    bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
-    if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
-      pPixCb[-iStrideX] = ((p1 << 1) + p0 + q1 + 2) >> 2;     /* p0' */
-      pPixCb[0]  = ((q1 << 1) + q0 + p1 + 2) >> 2;     /* q0' */
-    }
-
-    //cr
-    p0 = pPixCr[-iStrideX];
-    p1 = pPixCr[-2 * iStrideX];
-    q0 = pPixCr[0];
-    q1 = pPixCr[iStrideX];
-    bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha;
-    bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta;
-    bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta;
-    if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) {
-      pPixCr[-iStrideX] = ((p1 << 1) + p0 + q1 + 2) >> 2;     /* p0' */
-      pPixCr[0]  = ((q1 << 1) + q0 + p1 + 2) >> 2;     /* q0' */
-    }
-    pPixCr += iStrideY;
-    pPixCb += iStrideY;
-  }
-}
-void DeblockChromaLt4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
-                          int8_t* iTc) {
-  DeblockChromaLt4_c (pPixCb, pPixCr, iStride, 1, iAlpha, iBeta, iTc);
-}
-void DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
-                          int8_t* iTc) {
-  DeblockChromaLt4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta, iTc);
-}
-void DeblockChromaEq4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
-  DeblockChromaEq4_c (pPixCb, pPixCr, iStride, 1, iAlpha, iBeta);
-}
-void DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
-  DeblockChromaEq4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta);
-}
-
-
 void  DeblockingFilterFrameAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc) {
   int32_t i, j;
   const int32_t kiMbWidth	= pCurDq->iMbWidth;
@@ -956,29 +772,6 @@
   *pfSetNZCZero = WelsNonZeroCount_c;
 }
 
-
-#ifdef X86_ASM
-
-extern "C" {
-  void DeblockLumaLt4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
-    ENFORCE_STACK_ALIGN_1D (uint8_t,  uiBuf,   16 * 8, 16);
-
-    DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
-    DeblockLumaLt4V_sse2 (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc);
-    DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]);
-  }
-
-  void DeblockLumaEq4H_sse2 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
-    ENFORCE_STACK_ALIGN_1D (uint8_t,  uiBuf,   16 * 8, 16);
-
-    DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
-    DeblockLumaEq4V_sse2 (&uiBuf[4 * 16], 16, iAlpha, iBeta);
-    DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]);
-  }
-
-}
-
-#endif
 
 
 void  DeblockingInit (DeblockingFunc*   pFunc,  int32_t iCpu) {
--- a/codec/encoder/core/src/decode_mb_aux.cpp
+++ b/codec/encoder/core/src/decode_mb_aux.cpp
@@ -261,15 +261,15 @@
 
 #if defined(X86_ASM)
   if (uiCpuFlag & WELS_CPU_MMXEXT) {
-    //  pFuncList->pfIDctT4		= WelsIDctT4Rec_mmx;
+     pFuncList->pfIDctT4		= WelsIDctT4Rec_mmx;
   }
   if (uiCpuFlag & WELS_CPU_SSE2) {
-    /* pFuncList->pfDequantization4x4			= WelsDequant4x4_sse2;
+     pFuncList->pfDequantization4x4			= WelsDequant4x4_sse2;
      pFuncList->pfDequantizationFour4x4		= WelsDequantFour4x4_sse2;
      pFuncList->pfDequantizationIHadamard4x4	= WelsDequantIHadamard4x4_sse2;
 
      pFuncList->pfIDctFourT4		= WelsIDctFourT4Rec_sse2;
-     pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_sse2;*/
+     pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_sse2;
   }
 #endif//X86_ASM
 }
--- a/codec/encoder/core/src/encode_mb_aux.cpp
+++ b/codec/encoder/core/src/encode_mb_aux.cpp
@@ -569,7 +569,7 @@
   }
 //#ifndef MACOS
   if (uiCpuFlag & WELS_CPU_SSSE3) {
-    //  pFuncList->pfScan4x4				= WelsScan4x4DcAc_ssse3;
+    pFuncList->pfScan4x4				= WelsScan4x4DcAc_ssse3;
   }
 
 //#endif//MACOS
--- a/codec/encoder/targets.mk
+++ b/codec/encoder/targets.mk
@@ -1,60 +1,170 @@
 ENCODER_PREFIX=ENCODER
 ENCODER_SRCDIR=codec/encoder
 ENCODER_CPP_SRCS=\
-	$(ENCODER_SRCDIR)/./core/src/au_set.cpp\
-	$(ENCODER_SRCDIR)/./core/src/cpu.cpp\
+	$(ENCODER_SRCDIR)/./core/src/mc.cpp\
+	$(ENCODER_SRCDIR)/./core/src/slice_multi_threading.cpp\
+	$(ENCODER_SRCDIR)/./core/src/wels_preprocess.cpp\
+	$(ENCODER_SRCDIR)/./core/src/ratectl.cpp\
+	$(ENCODER_SRCDIR)/./core/src/get_intra_predictor.cpp\
+	$(ENCODER_SRCDIR)/./core/src/encoder_ext.cpp\
+	$(ENCODER_SRCDIR)/./core/src/ref_list_mgr_svc.cpp\
 	$(ENCODER_SRCDIR)/./core/src/deblocking.cpp\
-	$(ENCODER_SRCDIR)/./core/src/decode_mb_aux.cpp\
-	$(ENCODER_SRCDIR)/./core/src/encode_mb_aux.cpp\
+	$(ENCODER_SRCDIR)/./core/src/picture_handle.cpp\
 	$(ENCODER_SRCDIR)/./core/src/encoder.cpp\
 	$(ENCODER_SRCDIR)/./core/src/encoder_data_tables.cpp\
-	$(ENCODER_SRCDIR)/./core/src/encoder_ext.cpp\
-	$(ENCODER_SRCDIR)/./core/src/expand_pic.cpp\
-	$(ENCODER_SRCDIR)/./core/src/get_intra_predictor.cpp\
-	$(ENCODER_SRCDIR)/./core/src/mc.cpp\
-	$(ENCODER_SRCDIR)/./core/src/md.cpp\
-	$(ENCODER_SRCDIR)/./core/src/memory_align.cpp\
-	$(ENCODER_SRCDIR)/./core/src/mv_pred.cpp\
-	$(ENCODER_SRCDIR)/./core/src/nal_encap.cpp\
-	$(ENCODER_SRCDIR)/./core/src/picture_handle.cpp\
 	$(ENCODER_SRCDIR)/./core/src/property.cpp\
-	$(ENCODER_SRCDIR)/./core/src/ratectl.cpp\
-	$(ENCODER_SRCDIR)/./core/src/ref_list_mgr_svc.cpp\
-	$(ENCODER_SRCDIR)/./core/src/sample.cpp\
+	$(ENCODER_SRCDIR)/./core/src/svc_set_mb_syn_cavlc.cpp\
+	$(ENCODER_SRCDIR)/./core/src/encode_mb_aux.cpp\
+	$(ENCODER_SRCDIR)/./core/src/memory_align.cpp\
 	$(ENCODER_SRCDIR)/./core/src/set_mb_syn_cavlc.cpp\
-	$(ENCODER_SRCDIR)/./core/src/slice_multi_threading.cpp\
+	$(ENCODER_SRCDIR)/./core/src/utils.cpp\
+	$(ENCODER_SRCDIR)/./core/src/mv_pred.cpp\
+	$(ENCODER_SRCDIR)/./core/src/svc_encode_slice.cpp\
+	$(ENCODER_SRCDIR)/./core/src/au_set.cpp\
 	$(ENCODER_SRCDIR)/./core/src/svc_base_layer_md.cpp\
-	$(ENCODER_SRCDIR)/./core/src/svc_enc_slice_segment.cpp\
 	$(ENCODER_SRCDIR)/./core/src/svc_encode_mb.cpp\
-	$(ENCODER_SRCDIR)/./core/src/svc_encode_slice.cpp\
-	$(ENCODER_SRCDIR)/./core/src/svc_mode_decision.cpp\
 	$(ENCODER_SRCDIR)/./core/src/svc_motion_estimate.cpp\
-	$(ENCODER_SRCDIR)/./core/src/svc_set_mb_syn_cavlc.cpp\
-	$(ENCODER_SRCDIR)/./core/src/utils.cpp\
-	$(ENCODER_SRCDIR)/./core/src/wels_preprocess.cpp\
-	$(ENCODER_SRCDIR)/./plus/src/welsCodecTrace.cpp\
+	$(ENCODER_SRCDIR)/./core/src/md.cpp\
+	$(ENCODER_SRCDIR)/./core/src/sample.cpp\
+	$(ENCODER_SRCDIR)/./core/src/svc_enc_slice_segment.cpp\
+	$(ENCODER_SRCDIR)/./core/src/svc_mode_decision.cpp\
+	$(ENCODER_SRCDIR)/./core/src/nal_encap.cpp\
+	$(ENCODER_SRCDIR)/./core/src/expand_pic.cpp\
+	$(ENCODER_SRCDIR)/./core/src/decode_mb_aux.cpp\
 	$(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.cpp\
+	$(ENCODER_SRCDIR)/./plus/src/welsCodecTrace.cpp\
 
 ENCODER_OBJS += $(ENCODER_CPP_SRCS:.cpp=.o)
 ifeq ($(USE_ASM), Yes)
 ENCODER_ASM_SRCS=\
+	$(ENCODER_SRCDIR)/./core/asm/quant.asm\
+	$(ENCODER_SRCDIR)/./core/asm/score.asm\
 	$(ENCODER_SRCDIR)/./core/asm/coeff.asm\
 	$(ENCODER_SRCDIR)/./core/asm/dct.asm\
-	$(ENCODER_SRCDIR)/./core/asm/intra_pred.asm\
-	$(ENCODER_SRCDIR)/./core/asm/memzero.asm\
-	$(ENCODER_SRCDIR)/./core/asm/quant.asm\
 	$(ENCODER_SRCDIR)/./core/asm/satd_sad.asm\
-	$(ENCODER_SRCDIR)/./core/asm/score.asm\
+	$(ENCODER_SRCDIR)/./core/asm/memzero.asm\
+	$(ENCODER_SRCDIR)/./core/asm/intra_pred.asm\
 
 ENCODER_OBJS += $(ENCODER_ASM_SRCS:.asm=.o)
 endif
 
 OBJS += $(ENCODER_OBJS)
-$(ENCODER_SRCDIR)/%.o: $(ENCODER_SRCDIR)/%.cpp
-	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $@ $<
+$(ENCODER_SRCDIR)/./core/src/mc.o: $(ENCODER_SRCDIR)/./core/src/mc.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/mc.o $(ENCODER_SRCDIR)/./core/src/mc.cpp
 
-$(ENCODER_SRCDIR)/%.o: $(ENCODER_SRCDIR)/%.asm
-	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $@ $<
+$(ENCODER_SRCDIR)/./core/src/slice_multi_threading.o: $(ENCODER_SRCDIR)/./core/src/slice_multi_threading.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/slice_multi_threading.o $(ENCODER_SRCDIR)/./core/src/slice_multi_threading.cpp
+
+$(ENCODER_SRCDIR)/./core/src/wels_preprocess.o: $(ENCODER_SRCDIR)/./core/src/wels_preprocess.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/wels_preprocess.o $(ENCODER_SRCDIR)/./core/src/wels_preprocess.cpp
+
+$(ENCODER_SRCDIR)/./core/src/ratectl.o: $(ENCODER_SRCDIR)/./core/src/ratectl.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/ratectl.o $(ENCODER_SRCDIR)/./core/src/ratectl.cpp
+
+$(ENCODER_SRCDIR)/./core/src/get_intra_predictor.o: $(ENCODER_SRCDIR)/./core/src/get_intra_predictor.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/get_intra_predictor.o $(ENCODER_SRCDIR)/./core/src/get_intra_predictor.cpp
+
+$(ENCODER_SRCDIR)/./core/src/encoder_ext.o: $(ENCODER_SRCDIR)/./core/src/encoder_ext.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/encoder_ext.o $(ENCODER_SRCDIR)/./core/src/encoder_ext.cpp
+
+$(ENCODER_SRCDIR)/./core/src/ref_list_mgr_svc.o: $(ENCODER_SRCDIR)/./core/src/ref_list_mgr_svc.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/ref_list_mgr_svc.o $(ENCODER_SRCDIR)/./core/src/ref_list_mgr_svc.cpp
+
+$(ENCODER_SRCDIR)/./core/src/deblocking.o: $(ENCODER_SRCDIR)/./core/src/deblocking.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/deblocking.o $(ENCODER_SRCDIR)/./core/src/deblocking.cpp
+
+$(ENCODER_SRCDIR)/./core/src/picture_handle.o: $(ENCODER_SRCDIR)/./core/src/picture_handle.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/picture_handle.o $(ENCODER_SRCDIR)/./core/src/picture_handle.cpp
+
+$(ENCODER_SRCDIR)/./core/src/encoder.o: $(ENCODER_SRCDIR)/./core/src/encoder.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/encoder.o $(ENCODER_SRCDIR)/./core/src/encoder.cpp
+
+$(ENCODER_SRCDIR)/./core/src/encoder_data_tables.o: $(ENCODER_SRCDIR)/./core/src/encoder_data_tables.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/encoder_data_tables.o $(ENCODER_SRCDIR)/./core/src/encoder_data_tables.cpp
+
+$(ENCODER_SRCDIR)/./core/src/property.o: $(ENCODER_SRCDIR)/./core/src/property.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/property.o $(ENCODER_SRCDIR)/./core/src/property.cpp
+
+$(ENCODER_SRCDIR)/./core/src/svc_set_mb_syn_cavlc.o: $(ENCODER_SRCDIR)/./core/src/svc_set_mb_syn_cavlc.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/svc_set_mb_syn_cavlc.o $(ENCODER_SRCDIR)/./core/src/svc_set_mb_syn_cavlc.cpp
+
+$(ENCODER_SRCDIR)/./core/src/encode_mb_aux.o: $(ENCODER_SRCDIR)/./core/src/encode_mb_aux.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/encode_mb_aux.o $(ENCODER_SRCDIR)/./core/src/encode_mb_aux.cpp
+
+$(ENCODER_SRCDIR)/./core/src/memory_align.o: $(ENCODER_SRCDIR)/./core/src/memory_align.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/memory_align.o $(ENCODER_SRCDIR)/./core/src/memory_align.cpp
+
+$(ENCODER_SRCDIR)/./core/src/set_mb_syn_cavlc.o: $(ENCODER_SRCDIR)/./core/src/set_mb_syn_cavlc.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/set_mb_syn_cavlc.o $(ENCODER_SRCDIR)/./core/src/set_mb_syn_cavlc.cpp
+
+$(ENCODER_SRCDIR)/./core/src/utils.o: $(ENCODER_SRCDIR)/./core/src/utils.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/utils.o $(ENCODER_SRCDIR)/./core/src/utils.cpp
+
+$(ENCODER_SRCDIR)/./core/src/mv_pred.o: $(ENCODER_SRCDIR)/./core/src/mv_pred.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/mv_pred.o $(ENCODER_SRCDIR)/./core/src/mv_pred.cpp
+
+$(ENCODER_SRCDIR)/./core/src/svc_encode_slice.o: $(ENCODER_SRCDIR)/./core/src/svc_encode_slice.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/svc_encode_slice.o $(ENCODER_SRCDIR)/./core/src/svc_encode_slice.cpp
+
+$(ENCODER_SRCDIR)/./core/src/au_set.o: $(ENCODER_SRCDIR)/./core/src/au_set.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/au_set.o $(ENCODER_SRCDIR)/./core/src/au_set.cpp
+
+$(ENCODER_SRCDIR)/./core/src/svc_base_layer_md.o: $(ENCODER_SRCDIR)/./core/src/svc_base_layer_md.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/svc_base_layer_md.o $(ENCODER_SRCDIR)/./core/src/svc_base_layer_md.cpp
+
+$(ENCODER_SRCDIR)/./core/src/svc_encode_mb.o: $(ENCODER_SRCDIR)/./core/src/svc_encode_mb.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/svc_encode_mb.o $(ENCODER_SRCDIR)/./core/src/svc_encode_mb.cpp
+
+$(ENCODER_SRCDIR)/./core/src/svc_motion_estimate.o: $(ENCODER_SRCDIR)/./core/src/svc_motion_estimate.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/svc_motion_estimate.o $(ENCODER_SRCDIR)/./core/src/svc_motion_estimate.cpp
+
+$(ENCODER_SRCDIR)/./core/src/md.o: $(ENCODER_SRCDIR)/./core/src/md.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/md.o $(ENCODER_SRCDIR)/./core/src/md.cpp
+
+$(ENCODER_SRCDIR)/./core/src/sample.o: $(ENCODER_SRCDIR)/./core/src/sample.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/sample.o $(ENCODER_SRCDIR)/./core/src/sample.cpp
+
+$(ENCODER_SRCDIR)/./core/src/svc_enc_slice_segment.o: $(ENCODER_SRCDIR)/./core/src/svc_enc_slice_segment.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/svc_enc_slice_segment.o $(ENCODER_SRCDIR)/./core/src/svc_enc_slice_segment.cpp
+
+$(ENCODER_SRCDIR)/./core/src/svc_mode_decision.o: $(ENCODER_SRCDIR)/./core/src/svc_mode_decision.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/svc_mode_decision.o $(ENCODER_SRCDIR)/./core/src/svc_mode_decision.cpp
+
+$(ENCODER_SRCDIR)/./core/src/nal_encap.o: $(ENCODER_SRCDIR)/./core/src/nal_encap.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/nal_encap.o $(ENCODER_SRCDIR)/./core/src/nal_encap.cpp
+
+$(ENCODER_SRCDIR)/./core/src/expand_pic.o: $(ENCODER_SRCDIR)/./core/src/expand_pic.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/expand_pic.o $(ENCODER_SRCDIR)/./core/src/expand_pic.cpp
+
+$(ENCODER_SRCDIR)/./core/src/decode_mb_aux.o: $(ENCODER_SRCDIR)/./core/src/decode_mb_aux.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./core/src/decode_mb_aux.o $(ENCODER_SRCDIR)/./core/src/decode_mb_aux.cpp
+
+$(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.o: $(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.o $(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.cpp
+
+$(ENCODER_SRCDIR)/./plus/src/welsCodecTrace.o: $(ENCODER_SRCDIR)/./plus/src/welsCodecTrace.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./plus/src/welsCodecTrace.o $(ENCODER_SRCDIR)/./plus/src/welsCodecTrace.cpp
+
+$(ENCODER_SRCDIR)/./core/asm/quant.o: $(ENCODER_SRCDIR)/./core/asm/quant.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/quant.o $(ENCODER_SRCDIR)/./core/asm/quant.asm
+
+$(ENCODER_SRCDIR)/./core/asm/score.o: $(ENCODER_SRCDIR)/./core/asm/score.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/score.o $(ENCODER_SRCDIR)/./core/asm/score.asm
+
+$(ENCODER_SRCDIR)/./core/asm/coeff.o: $(ENCODER_SRCDIR)/./core/asm/coeff.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/coeff.o $(ENCODER_SRCDIR)/./core/asm/coeff.asm
+
+$(ENCODER_SRCDIR)/./core/asm/dct.o: $(ENCODER_SRCDIR)/./core/asm/dct.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/dct.o $(ENCODER_SRCDIR)/./core/asm/dct.asm
+
+$(ENCODER_SRCDIR)/./core/asm/satd_sad.o: $(ENCODER_SRCDIR)/./core/asm/satd_sad.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/satd_sad.o $(ENCODER_SRCDIR)/./core/asm/satd_sad.asm
+
+$(ENCODER_SRCDIR)/./core/asm/memzero.o: $(ENCODER_SRCDIR)/./core/asm/memzero.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/memzero.o $(ENCODER_SRCDIR)/./core/asm/memzero.asm
+
+$(ENCODER_SRCDIR)/./core/asm/intra_pred.o: $(ENCODER_SRCDIR)/./core/asm/intra_pred.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/intra_pred.o $(ENCODER_SRCDIR)/./core/asm/intra_pred.asm
 
 $(LIBPREFIX)encoder.$(LIBSUFFIX): $(ENCODER_OBJS)
 	rm -f $(LIBPREFIX)encoder.$(LIBSUFFIX)
--- a/codec/processing/targets.mk
+++ b/codec/processing/targets.mk
@@ -1,44 +1,110 @@
 PROCESSING_PREFIX=PROCESSING
 PROCESSING_SRCDIR=codec/processing
 PROCESSING_CPP_SRCS=\
-	$(PROCESSING_SRCDIR)/./src/adaptivequantization/AdaptiveQuantization.cpp\
-	$(PROCESSING_SRCDIR)/./src/backgounddetection/BackgroundDetection.cpp\
-	$(PROCESSING_SRCDIR)/./src/common/cpu.cpp\
-	$(PROCESSING_SRCDIR)/./src/common/memory.cpp\
-	$(PROCESSING_SRCDIR)/./src/common/thread.cpp\
+	$(PROCESSING_SRCDIR)/./src/imagerotate/imagerotate.cpp\
+	$(PROCESSING_SRCDIR)/./src/imagerotate/imagerotatefuncs.cpp\
+	$(PROCESSING_SRCDIR)/./src/vaacalc/vaacalcfuncs.cpp\
+	$(PROCESSING_SRCDIR)/./src/vaacalc/vaacalculation.cpp\
+	$(PROCESSING_SRCDIR)/./src/scenechangedetection/SceneChangeDetection.cpp\
+	$(PROCESSING_SRCDIR)/./src/scenechangedetection/SceneChangeDetectionCommon.cpp\
+	$(PROCESSING_SRCDIR)/./src/common/WelsFrameWorkEx.cpp\
 	$(PROCESSING_SRCDIR)/./src/common/util.cpp\
+	$(PROCESSING_SRCDIR)/./src/common/memory.cpp\
 	$(PROCESSING_SRCDIR)/./src/common/WelsFrameWork.cpp\
-	$(PROCESSING_SRCDIR)/./src/common/WelsFrameWorkEx.cpp\
+	$(PROCESSING_SRCDIR)/./src/common/cpu.cpp\
+	$(PROCESSING_SRCDIR)/./src/common/thread.cpp\
 	$(PROCESSING_SRCDIR)/./src/complexityanalysis/ComplexityAnalysis.cpp\
-	$(PROCESSING_SRCDIR)/./src/denoise/denoise.cpp\
 	$(PROCESSING_SRCDIR)/./src/denoise/denoise_filter.cpp\
+	$(PROCESSING_SRCDIR)/./src/denoise/denoise.cpp\
+	$(PROCESSING_SRCDIR)/./src/adaptivequantization/AdaptiveQuantization.cpp\
 	$(PROCESSING_SRCDIR)/./src/downsample/downsample.cpp\
 	$(PROCESSING_SRCDIR)/./src/downsample/downsamplefuncs.cpp\
-	$(PROCESSING_SRCDIR)/./src/imagerotate/imagerotate.cpp\
-	$(PROCESSING_SRCDIR)/./src/imagerotate/imagerotatefuncs.cpp\
-	$(PROCESSING_SRCDIR)/./src/scenechangedetection/SceneChangeDetection.cpp\
-	$(PROCESSING_SRCDIR)/./src/scenechangedetection/SceneChangeDetectionCommon.cpp\
-	$(PROCESSING_SRCDIR)/./src/vaacalc/vaacalcfuncs.cpp\
-	$(PROCESSING_SRCDIR)/./src/vaacalc/vaacalculation.cpp\
+	$(PROCESSING_SRCDIR)/./src/backgounddetection/BackgroundDetection.cpp\
 
 PROCESSING_OBJS += $(PROCESSING_CPP_SRCS:.cpp=.o)
 ifeq ($(USE_ASM), Yes)
 PROCESSING_ASM_SRCS=\
-	$(PROCESSING_SRCDIR)/./src/asm/denoisefilter.asm\
-	$(PROCESSING_SRCDIR)/./src/asm/downsample_bilinear.asm\
-	$(PROCESSING_SRCDIR)/./src/asm/intra_pred.asm\
 	$(PROCESSING_SRCDIR)/./src/asm/sad.asm\
 	$(PROCESSING_SRCDIR)/./src/asm/vaa.asm\
+	$(PROCESSING_SRCDIR)/./src/asm/denoisefilter.asm\
+	$(PROCESSING_SRCDIR)/./src/asm/intra_pred.asm\
+	$(PROCESSING_SRCDIR)/./src/asm/downsample_bilinear.asm\
 
 PROCESSING_OBJS += $(PROCESSING_ASM_SRCS:.asm=.o)
 endif
 
 OBJS += $(PROCESSING_OBJS)
-$(PROCESSING_SRCDIR)/%.o: $(PROCESSING_SRCDIR)/%.cpp
-	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $@ $<
+$(PROCESSING_SRCDIR)/./src/imagerotate/imagerotate.o: $(PROCESSING_SRCDIR)/./src/imagerotate/imagerotate.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/imagerotate/imagerotate.o $(PROCESSING_SRCDIR)/./src/imagerotate/imagerotate.cpp
 
-$(PROCESSING_SRCDIR)/%.o: $(PROCESSING_SRCDIR)/%.asm
-	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(PROCESSING_ASMFLAGS) $(PROCESSING_ASM_INCLUDES) -o $@ $<
+$(PROCESSING_SRCDIR)/./src/imagerotate/imagerotatefuncs.o: $(PROCESSING_SRCDIR)/./src/imagerotate/imagerotatefuncs.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/imagerotate/imagerotatefuncs.o $(PROCESSING_SRCDIR)/./src/imagerotate/imagerotatefuncs.cpp
+
+$(PROCESSING_SRCDIR)/./src/vaacalc/vaacalcfuncs.o: $(PROCESSING_SRCDIR)/./src/vaacalc/vaacalcfuncs.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/vaacalc/vaacalcfuncs.o $(PROCESSING_SRCDIR)/./src/vaacalc/vaacalcfuncs.cpp
+
+$(PROCESSING_SRCDIR)/./src/vaacalc/vaacalculation.o: $(PROCESSING_SRCDIR)/./src/vaacalc/vaacalculation.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/vaacalc/vaacalculation.o $(PROCESSING_SRCDIR)/./src/vaacalc/vaacalculation.cpp
+
+$(PROCESSING_SRCDIR)/./src/scenechangedetection/SceneChangeDetection.o: $(PROCESSING_SRCDIR)/./src/scenechangedetection/SceneChangeDetection.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/scenechangedetection/SceneChangeDetection.o $(PROCESSING_SRCDIR)/./src/scenechangedetection/SceneChangeDetection.cpp
+
+$(PROCESSING_SRCDIR)/./src/scenechangedetection/SceneChangeDetectionCommon.o: $(PROCESSING_SRCDIR)/./src/scenechangedetection/SceneChangeDetectionCommon.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/scenechangedetection/SceneChangeDetectionCommon.o $(PROCESSING_SRCDIR)/./src/scenechangedetection/SceneChangeDetectionCommon.cpp
+
+$(PROCESSING_SRCDIR)/./src/common/WelsFrameWorkEx.o: $(PROCESSING_SRCDIR)/./src/common/WelsFrameWorkEx.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/common/WelsFrameWorkEx.o $(PROCESSING_SRCDIR)/./src/common/WelsFrameWorkEx.cpp
+
+$(PROCESSING_SRCDIR)/./src/common/util.o: $(PROCESSING_SRCDIR)/./src/common/util.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/common/util.o $(PROCESSING_SRCDIR)/./src/common/util.cpp
+
+$(PROCESSING_SRCDIR)/./src/common/memory.o: $(PROCESSING_SRCDIR)/./src/common/memory.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/common/memory.o $(PROCESSING_SRCDIR)/./src/common/memory.cpp
+
+$(PROCESSING_SRCDIR)/./src/common/WelsFrameWork.o: $(PROCESSING_SRCDIR)/./src/common/WelsFrameWork.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/common/WelsFrameWork.o $(PROCESSING_SRCDIR)/./src/common/WelsFrameWork.cpp
+
+$(PROCESSING_SRCDIR)/./src/common/cpu.o: $(PROCESSING_SRCDIR)/./src/common/cpu.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/common/cpu.o $(PROCESSING_SRCDIR)/./src/common/cpu.cpp
+
+$(PROCESSING_SRCDIR)/./src/common/thread.o: $(PROCESSING_SRCDIR)/./src/common/thread.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/common/thread.o $(PROCESSING_SRCDIR)/./src/common/thread.cpp
+
+$(PROCESSING_SRCDIR)/./src/complexityanalysis/ComplexityAnalysis.o: $(PROCESSING_SRCDIR)/./src/complexityanalysis/ComplexityAnalysis.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/complexityanalysis/ComplexityAnalysis.o $(PROCESSING_SRCDIR)/./src/complexityanalysis/ComplexityAnalysis.cpp
+
+$(PROCESSING_SRCDIR)/./src/denoise/denoise_filter.o: $(PROCESSING_SRCDIR)/./src/denoise/denoise_filter.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/denoise/denoise_filter.o $(PROCESSING_SRCDIR)/./src/denoise/denoise_filter.cpp
+
+$(PROCESSING_SRCDIR)/./src/denoise/denoise.o: $(PROCESSING_SRCDIR)/./src/denoise/denoise.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/denoise/denoise.o $(PROCESSING_SRCDIR)/./src/denoise/denoise.cpp
+
+$(PROCESSING_SRCDIR)/./src/adaptivequantization/AdaptiveQuantization.o: $(PROCESSING_SRCDIR)/./src/adaptivequantization/AdaptiveQuantization.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/adaptivequantization/AdaptiveQuantization.o $(PROCESSING_SRCDIR)/./src/adaptivequantization/AdaptiveQuantization.cpp
+
+$(PROCESSING_SRCDIR)/./src/downsample/downsample.o: $(PROCESSING_SRCDIR)/./src/downsample/downsample.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/downsample/downsample.o $(PROCESSING_SRCDIR)/./src/downsample/downsample.cpp
+
+$(PROCESSING_SRCDIR)/./src/downsample/downsamplefuncs.o: $(PROCESSING_SRCDIR)/./src/downsample/downsamplefuncs.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/downsample/downsamplefuncs.o $(PROCESSING_SRCDIR)/./src/downsample/downsamplefuncs.cpp
+
+$(PROCESSING_SRCDIR)/./src/backgounddetection/BackgroundDetection.o: $(PROCESSING_SRCDIR)/./src/backgounddetection/BackgroundDetection.cpp
+	$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(PROCESSING_CFLAGS) $(PROCESSING_INCLUDES) -c -o $(PROCESSING_SRCDIR)/./src/backgounddetection/BackgroundDetection.o $(PROCESSING_SRCDIR)/./src/backgounddetection/BackgroundDetection.cpp
+
+$(PROCESSING_SRCDIR)/./src/asm/sad.o: $(PROCESSING_SRCDIR)/./src/asm/sad.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(PROCESSING_ASMFLAGS) $(PROCESSING_ASM_INCLUDES) -o $(PROCESSING_SRCDIR)/./src/asm/sad.o $(PROCESSING_SRCDIR)/./src/asm/sad.asm
+
+$(PROCESSING_SRCDIR)/./src/asm/vaa.o: $(PROCESSING_SRCDIR)/./src/asm/vaa.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(PROCESSING_ASMFLAGS) $(PROCESSING_ASM_INCLUDES) -o $(PROCESSING_SRCDIR)/./src/asm/vaa.o $(PROCESSING_SRCDIR)/./src/asm/vaa.asm
+
+$(PROCESSING_SRCDIR)/./src/asm/denoisefilter.o: $(PROCESSING_SRCDIR)/./src/asm/denoisefilter.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(PROCESSING_ASMFLAGS) $(PROCESSING_ASM_INCLUDES) -o $(PROCESSING_SRCDIR)/./src/asm/denoisefilter.o $(PROCESSING_SRCDIR)/./src/asm/denoisefilter.asm
+
+$(PROCESSING_SRCDIR)/./src/asm/intra_pred.o: $(PROCESSING_SRCDIR)/./src/asm/intra_pred.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(PROCESSING_ASMFLAGS) $(PROCESSING_ASM_INCLUDES) -o $(PROCESSING_SRCDIR)/./src/asm/intra_pred.o $(PROCESSING_SRCDIR)/./src/asm/intra_pred.asm
+
+$(PROCESSING_SRCDIR)/./src/asm/downsample_bilinear.o: $(PROCESSING_SRCDIR)/./src/asm/downsample_bilinear.asm
+	$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(PROCESSING_ASMFLAGS) $(PROCESSING_ASM_INCLUDES) -o $(PROCESSING_SRCDIR)/./src/asm/downsample_bilinear.o $(PROCESSING_SRCDIR)/./src/asm/downsample_bilinear.asm
 
 $(LIBPREFIX)processing.$(LIBSUFFIX): $(PROCESSING_OBJS)
 	rm -f $(LIBPREFIX)processing.$(LIBSUFFIX)