shithub: openh264

Download patch

ref: ec84f4bcc90d6aa447860eba8235420edc79e41f
parent: 3958118bf03c92aa547dbe3c77c5557ed4ad944b
author: volvet <qizh@cisco.com>
date: Fri Jan 3 09:49:45 EST 2014

resolve conflict

--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,7 @@
 CP=cp
 ROOTDIR=$(PWD)
 
+
 ifeq (,$(wildcard ./gtest))
 HAVE_GTEST=No
 else
@@ -13,20 +14,22 @@
 # Configurations
 ifeq ($(BUILDTYPE), Release)
 CFLAGS += -O3
-ifneq ($(ENABLE64BIT), Yes)
 USE_ASM = Yes
-endif
 else
 CFLAGS = -g
 USE_ASM = No
 endif
+
 ifeq ($(ENABLE64BIT), Yes)
 CFLAGS += -m64
 LDFLAGS += -m64
+ASMFLAGS += -DUNIX64
 else
 CFLAGS += -m32
 LDFLAGS += -m32
+ASMFLAGS += -DX86_32
 endif
+
 include build/platform-$(UNAME).mk
 
 ifeq ($(USE_ASM),Yes)
@@ -40,7 +43,8 @@
 
 #### No user-serviceable parts below this line
 INCLUDES = -Icodec/api/svc  -Icodec/common -Igtest/include
-ASM_INCLUDES = -Iprocessing/src/asm/
+#ASM_INCLUDES = -Iprocessing/src/asm/
+ASM_INCLUDES = -Icodec/common/
 
 COMMON_INCLUDES = \
     -Icodec/decoder/core/inc
@@ -83,7 +87,7 @@
 include codec/common/targets.mk
 include codec/decoder/targets.mk
 include codec/encoder/targets.mk
-include processing/targets.mk
+include codec/processing/targets.mk
 include codec/console/dec/targets.mk
 include codec/console/enc/targets.mk
 
--- a/build/mktargets.sh
+++ b/build/mktargets.sh
@@ -2,7 +2,7 @@
 (cd codec/decoder; python ../../build/mktargets.py --directory codec/decoder --library decoder --exclude StdAfx.cpp)
 (cd codec/encoder; python ../../build/mktargets.py --directory codec/encoder --library encoder --exclude DllEntry.cpp)
 (cd codec/common; python ../../build/mktargets.py --directory codec/common --library common)
-(cd processing; python ../build/mktargets.py --directory processing --library processing --exclude wels_process.cpp --exclude WelsVideoProcessor.cpp)
+(cd codec/processing; python ../../build/mktargets.py --directory codec/processing --library processing --exclude wels_process.cpp --exclude WelsVideoProcessor.cpp)
 
 (cd codec/console/dec; python ../../../build/mktargets.py --directory codec/console/dec --binary h264dec --exclude dec_console.h --exclude load_bundle_functions.cpp)
 (cd codec/console/enc; python ../../../build/mktargets.py --directory codec/console/enc --binary h264enc --exclude enc_console.h --exclude bundlewelsenc.cpp)
--- a/build/platform-darwin.mk
+++ b/build/platform-darwin.mk
@@ -1,5 +1,11 @@
-USE_ASM = No  # We don't have ASM working on Mac yet
+
 ASM = nasm
 CFLAGS += -Werror -fPIC
 LDFLAGS += -lpthread
-ASMFLAGS += -f macho --prefix _ -DNOPREFIX
+ASMFLAGS += --prefix _ -DNOPREFIX
+ifeq ($(ENABLE64BIT), Yes)
+ASMFLAGS += -f macho64
+else
+ASMFLAGS += -f macho
+endif
+
--- a/build/platform-linux.mk
+++ b/build/platform-linux.mk
@@ -1,5 +1,10 @@
 ASM = nasm
 CFLAGS += -Werror -fPIC -DLINUX -D__NO_CTYPE
 LDFLAGS += -lpthread
-ASMFLAGS += -f elf -DNOPREFIX
+ASMFLAGS += -DNOPREFIX
+ifeq ($(ENABLE64BIT), Yes)
+ASMFLAGS += -f elf64
+else 
+ASMFLAGS += -f elf32
+endif
 
--- a/codec/build/win32/dec/WelsDecCore.vcproj
+++ b/codec/build/win32/dec/WelsDecCore.vcproj
@@ -349,44 +349,6 @@
 				Filter="*.asm;*.inc"
 				>
 				<File
-					RelativePath="..\..\..\decoder\core\asm\asm_inc.asm"
-					>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-							Outputs="$(IntDir)\$(InputName).obj"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|x64"
-						ExcludedFromBuild="true"
-						>
-						<Tool
-							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-							Outputs="$(IntDir)\$(InputName).obj"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCustomBuildTool"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug|x64"
-						ExcludedFromBuild="true"
-						>
-						<Tool
-							Name="VCCustomBuildTool"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
 					RelativePath="..\..\..\decoder\core\asm\block_add.asm"
 					>
 					<FileConfiguration
@@ -394,17 +356,16 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Release|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
@@ -413,23 +374,22 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Debug|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\decoder\core\asm\cpuid.asm"
+					RelativePath="..\..\..\common\cpuid.asm"
 					>
 					<FileConfiguration
 						Name="Release|Win32"
@@ -436,17 +396,16 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Release|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
@@ -455,17 +414,16 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Debug|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
@@ -478,17 +436,16 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Release|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
@@ -497,23 +454,22 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Debug|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\decoder\core\asm\deblock.asm"
+					RelativePath="..\..\..\common\deblock.asm"
 					>
 					<FileConfiguration
 						Name="Release|Win32"
@@ -520,17 +476,16 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Release|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
@@ -539,23 +494,22 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Debug|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\decoder\core\asm\expand_picture.asm"
+					RelativePath="..\..\..\common\expand_picture.asm"
 					>
 					<FileConfiguration
 						Name="Release|Win32"
@@ -562,17 +516,16 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Release|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
@@ -581,17 +534,16 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Debug|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
@@ -604,17 +556,16 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Release|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
@@ -623,23 +574,22 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Debug|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\decoder\core\asm\mb_copy.asm"
+					RelativePath="..\..\..\common\mb_copy.asm"
 					>
 					<FileConfiguration
 						Name="Release|Win32"
@@ -646,17 +596,16 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Release|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
@@ -665,23 +614,22 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Debug|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\decoder\core\asm\mc_chroma.asm"
+					RelativePath="..\..\..\common\mc_chroma.asm"
 					>
 					<FileConfiguration
 						Name="Release|Win32"
@@ -688,17 +636,16 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Release|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
@@ -707,23 +654,22 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Debug|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 				</File>
 				<File
-					RelativePath="..\..\..\decoder\core\asm\mc_luma.asm"
+					RelativePath="..\..\..\common\mc_luma.asm"
 					>
 					<FileConfiguration
 						Name="Release|Win32"
@@ -730,17 +676,16 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Release|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
@@ -749,59 +694,16 @@
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
 					<FileConfiguration
 						Name="Debug|x64"
-						ExcludedFromBuild="true"
 						>
 						<Tool
 							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-							Outputs="$(IntDir)\$(InputName).obj"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
-					RelativePath="..\..\..\decoder\core\asm\memzero.asm"
-					>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-							Outputs="$(IntDir)\$(InputName).obj"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|x64"
-						ExcludedFromBuild="true"
-						>
-						<Tool
-							Name="VCCustomBuildTool"
-							CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-							Outputs="$(IntDir)\$(InputName).obj"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-							Outputs="$(IntDir)\$(InputName).obj"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug|x64"
-						ExcludedFromBuild="true"
-						>
-						<Tool
-							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 							Outputs="$(IntDir)\$(InputName).obj"
 						/>
 					</FileConfiguration>
--- a/codec/build/win32/dec/WelsDecCore_2010.vcxproj
+++ b/codec/build/win32/dec/WelsDecCore_2010.vcxproj
@@ -94,8 +94,8 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
-      <AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common\inc;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;X86_ASM;_LIB;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <StringPooling>true</StringPooling>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
@@ -125,8 +125,8 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
-      <AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common\inc;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>WIN64;NDEBUG;_LIB;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN64;NDEBUG;X86_ASM;_LIB;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <StringPooling>true</StringPooling>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
@@ -151,11 +151,15 @@
       <SuppressStartupBanner>true</SuppressStartupBanner>
       <OutputFile>$(OutDir)\WelsDecCore.bsc</OutputFile>
     </Bscmake>
+    <CustomBuild>
+      <Outputs>$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command>nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+    </CustomBuild>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common\inc;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;X86_ASM;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
@@ -184,7 +188,7 @@
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common\inc;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN64;_DEBUG;_LIB;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@@ -208,176 +212,45 @@
       <SuppressStartupBanner>true</SuppressStartupBanner>
       <OutputFile>$(OutDir)\WelsDecCore.bsc</OutputFile>
     </Bscmake>
+    <CustomBuild>
+      <Command>nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs>$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <CustomBuild Include="..\..\..\decoder\core\asm\asm_inc.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </CustomBuild>
     <CustomBuild Include="..\..\..\decoder\core\asm\block_add.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\cpuid.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </CustomBuild>
     <CustomBuild Include="..\..\..\decoder\core\asm\dct.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\deblock.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\expand_picture.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </CustomBuild>
     <CustomBuild Include="..\..\..\decoder\core\asm\intra_pred.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\mb_copy.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\mc_chroma.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\mc_luma.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\memzero.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </CustomBuild>
   </ItemGroup>
   <ItemGroup>
+    <ClInclude Include="..\..\..\common\logging.h" />
     <ClInclude Include="..\..\..\decoder\core\inc\as264_common.h" />
     <ClInclude Include="..\..\..\decoder\core\inc\au_parser.h" />
     <ClInclude Include="..\..\..\decoder\core\inc\bit_stream.h" />
@@ -419,6 +292,7 @@
     <ClInclude Include="..\..\..\decoder\core\inc\wels_const.h" />
   </ItemGroup>
   <ItemGroup>
+    <ClCompile Include="..\..\..\common\logging.cpp" />
     <ClCompile Include="..\..\..\decoder\core\src\au_parser.cpp" />
     <ClCompile Include="..\..\..\decoder\core\src\bit_stream.cpp" />
     <ClCompile Include="..\..\..\decoder\core\src\cpu.cpp" />
@@ -440,6 +314,68 @@
     <ClCompile Include="..\..\..\decoder\core\src\decode_slice.cpp" />
     <ClCompile Include="..\..\..\decoder\core\src\decoder_core.cpp" />
     <ClCompile Include="..\..\..\decoder\core\src\utils.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\..\common\cpuid.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+    </CustomBuild>
+    <CustomBuild Include="..\..\..\common\deblock.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+    </CustomBuild>
+    <CustomBuild Include="..\..\..\common\expand_picture.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+    </CustomBuild>
+    <CustomBuild Include="..\..\..\common\mb_copy.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+    </CustomBuild>
+    <CustomBuild Include="..\..\..\common\mc_chroma.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+    </CustomBuild>
+    <CustomBuild Include="..\..\..\common\mc_luma.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32  -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64  -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+    </CustomBuild>
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
--- a/codec/build/win32/dec/WelsDecCore_2010.vcxproj.filters
+++ b/codec/build/win32/dec/WelsDecCore_2010.vcxproj.filters
@@ -64,6 +64,9 @@
     <ClCompile Include="..\..\..\decoder\core\src\utils.cpp">
       <Filter>sources</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\common\logging.cpp">
+      <Filter>sources</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\..\decoder\core\inc\as264_common.h">
@@ -183,39 +186,36 @@
     <ClInclude Include="..\..\..\decoder\core\inc\wels_common_basis.h">
       <Filter>headers</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\common\logging.h">
+      <Filter>headers</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
-    <CustomBuild Include="..\..\..\decoder\core\asm\asm_inc.asm">
-      <Filter>ASM</Filter>
-    </CustomBuild>
     <CustomBuild Include="..\..\..\decoder\core\asm\block_add.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\cpuid.asm">
-      <Filter>ASM</Filter>
-    </CustomBuild>
     <CustomBuild Include="..\..\..\decoder\core\asm\dct.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\deblock.asm">
+    <CustomBuild Include="..\..\..\decoder\core\asm\intra_pred.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\expand_picture.asm">
+    <CustomBuild Include="..\..\..\common\mc_luma.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\intra_pred.asm">
+    <CustomBuild Include="..\..\..\common\mc_chroma.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\mb_copy.asm">
+    <CustomBuild Include="..\..\..\common\mb_copy.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\mc_chroma.asm">
+    <CustomBuild Include="..\..\..\common\expand_picture.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\mc_luma.asm">
+    <CustomBuild Include="..\..\..\common\deblock.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\decoder\core\asm\memzero.asm">
+    <CustomBuild Include="..\..\..\common\cpuid.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
   </ItemGroup>
--- a/codec/build/win32/dec/WelsDecPlus_2010.vcxproj
+++ b/codec/build/win32/dec/WelsDecPlus_2010.vcxproj
@@ -107,7 +107,7 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
-      <AdditionalIncludeDirectories>..\..\..\decoder\plus\inc;..\..\..\decoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\hwDecoder\plus\inc;..\..\..\hwDecoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\..\common;..\..\..\decoder\plus\inc;..\..\..\decoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\hwDecoder\plus\inc;..\..\..\hwDecoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;WELSDECPLUS_EXPORTS;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <StringPooling>true</StringPooling>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
@@ -156,7 +156,7 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
-      <AdditionalIncludeDirectories>..\..\..\decoder\plus\inc;..\..\..\decoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\hwDecoder\plus\inc;..\..\..\hwDecoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\..\common;..\..\..\decoder\plus\inc;..\..\..\decoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\hwDecoder\plus\inc;..\..\..\hwDecoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;WELSDECPLUS_EXPORTS;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <StringPooling>true</StringPooling>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
@@ -204,7 +204,7 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\..\decoder\plus\inc;..\..\..\decoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\hwDecoder\plus\inc;..\..\..\hwDecoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\..\common;..\..\..\decoder\plus\inc;..\..\..\decoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\hwDecoder\plus\inc;..\..\..\hwDecoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;WELSDECPLUS_EXPORTS;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
--- a/codec/build/win32/dec/decConsole_2010.vcxproj
+++ b/codec/build/win32/dec/decConsole_2010.vcxproj
@@ -102,7 +102,7 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
-      <AdditionalIncludeDirectories>..\..\..\console\dec\inc;..\..\..\api\svc;..\..\..\common;..\..\..\encoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\..\common;..\..\..\console\dec\inc;..\..\..\api\svc;..\..\..\common;..\..\..\encoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <StringPooling>true</StringPooling>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
@@ -144,7 +144,7 @@
     <ClCompile>
       <Optimization>MaxSpeed</Optimization>
       <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
-      <AdditionalIncludeDirectories>..\..\..\console\dec\inc;..\..\..\api\svc;..\..\..\common;..\..\..\encoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\..\common;..\..\..\console\dec\inc;..\..\..\api\svc;..\..\..\common;..\..\..\encoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <StringPooling>true</StringPooling>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
@@ -227,7 +227,7 @@
     </Midl>
     <ClCompile>
       <Optimization>Disabled</Optimization>
-      <AdditionalIncludeDirectories>..\..\..\console\dec\inc;..\..\..\api\svc;..\..\..\common;..\..\..\encoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\..\common;..\..\..\console\dec\inc;..\..\..\api\svc;..\..\..\common;..\..\..\encoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
--- a/codec/build/win32/enc/WelsEncCore.vcproj
+++ b/codec/build/win32/enc/WelsEncCore.vcproj
@@ -53,7 +53,7 @@
 				Name="VCCLCompilerTool"
 				Optimization="0"
 				AdditionalIncludeDirectories="..\..\..\encoder\core\inc,..\..\..\api\svc,..\..\..\WelsThreadLib\api;"
-				PreprocessorDefinitions="WIN32;_DEBUG;_LIB;WELS_SVC;ENCODER_CORE;X86_ASM;HAVE_CACHE_LINE_ALIGN;MT_ENABLED;"
+				PreprocessorDefinitions="WIN32;_DEBUG;_LIB;WELS_SVC;ENCODER_CORE;X86_ASM;HAVE_CACHE_LINE_ALIGN;MT_ENABLED"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
 				RuntimeLibrary="3"
@@ -101,9 +101,9 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Release|Win32"
-			OutputDirectory=".\..\..\..\..\bin\win32\Release"
-			IntermediateDirectory=".\..\..\..\obj\encoder\core\Release"
+			Name="Debug|x64"
+			OutputDirectory=".\..\..\..\..\bin\win64\Debug"
+			IntermediateDirectory=".\..\..\..\obj\encoder\core\Debug"
 			ConfigurationType="4"
 			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
 			UseOfMFC="0"
@@ -127,22 +127,20 @@
 			/>
 			<Tool
 				Name="VCMIDLTool"
+				TargetEnvironment="3"
 			/>
 			<Tool
 				Name="VCCLCompilerTool"
-				Optimization="3"
-				InlineFunctionExpansion="2"
-				FavorSizeOrSpeed="1"
-				WholeProgramOptimization="true"
-				AdditionalIncludeDirectories="..\..\..\encoder\core\inc,..\..\..\api\svc,..\..\..\WelsThreadLib\api"
-				PreprocessorDefinitions="WIN32;NDEBUG;_LIB;WELS_SVC;ENCODER_CORE;X86_ASM;HAVE_CACHE_LINE_ALIGN;MT_ENABLED;"
-				StringPooling="true"
-				RuntimeLibrary="2"
-				EnableFunctionLevelLinking="true"
-				PrecompiledHeaderFile=".\..\..\..\obj\encoder\core\Release/WelsEncCore.pch"
-				AssemblerListingLocation=".\..\..\..\obj\encoder\core\Release/"
-				ObjectFile=".\..\..\..\obj\encoder\core\Release/"
-				ProgramDataBaseFileName=".\..\..\..\obj\encoder\core\Release/"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\..\..\encoder\core\inc,..\..\..\api\svc,..\..\..\WelsThreadLib\api;"
+				PreprocessorDefinitions="WIN64;_DEBUG;_LIB;WELS_SVC;ENCODER_CORE;HAVE_CACHE_LINE_ALIGN;X86_ASM;MT_ENABLED"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				PrecompiledHeaderFile=".\..\..\..\obj\encoder\core\Debug/WelsEncCore.pch"
+				AssemblerListingLocation=".\..\..\..\obj\encoder\core\Debug/"
+				ObjectFile=".\..\..\..\obj\encoder\core\Debug/"
+				ProgramDataBaseFileName=".\..\..\..\obj\encoder\core\Debug/"
 				WarningLevel="3"
 				SuppressStartupBanner="true"
 				DebugInformationFormat="3"
@@ -152,7 +150,7 @@
 			/>
 			<Tool
 				Name="VCResourceCompilerTool"
-				PreprocessorDefinitions="NDEBUG"
+				PreprocessorDefinitions="_DEBUG"
 				Culture="1033"
 			/>
 			<Tool
@@ -160,7 +158,6 @@
 			/>
 			<Tool
 				Name="VCLibrarianTool"
-				AdditionalOptions="/LTCG"
 				OutputFile="$(OutDir)\welsecore.lib"
 				SuppressStartupBanner="true"
 			/>
@@ -184,9 +181,9 @@
 			/>
 		</Configuration>
 		<Configuration
-			Name="Debug|x64"
-			OutputDirectory=".\..\..\..\..\bin\win64\Debug"
-			IntermediateDirectory=".\..\..\..\obj\encoder\core\Debug"
+			Name="Release|Win32"
+			OutputDirectory=".\..\..\..\..\bin\win32\Release"
+			IntermediateDirectory=".\..\..\..\obj\encoder\core\Release"
 			ConfigurationType="4"
 			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
 			UseOfMFC="0"
@@ -210,20 +207,22 @@
 			/>
 			<Tool
 				Name="VCMIDLTool"
-				TargetEnvironment="3"
 			/>
 			<Tool
 				Name="VCCLCompilerTool"
-				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\encoder\core\inc,..\..\..\api\svc,..\..\..\WelsThreadLib\api;"
-				PreprocessorDefinitions="WIN64;_DEBUG;_LIB;WELS_SVC;ENCODER_CORE;HAVE_CACHE_LINE_ALIGN;MT_ENABLED"
-				MinimalRebuild="true"
-				BasicRuntimeChecks="3"
-				RuntimeLibrary="3"
-				PrecompiledHeaderFile=".\..\..\..\obj\encoder\core\Debug/WelsEncCore.pch"
-				AssemblerListingLocation=".\..\..\..\obj\encoder\core\Debug/"
-				ObjectFile=".\..\..\..\obj\encoder\core\Debug/"
-				ProgramDataBaseFileName=".\..\..\..\obj\encoder\core\Debug/"
+				Optimization="3"
+				InlineFunctionExpansion="2"
+				FavorSizeOrSpeed="1"
+				WholeProgramOptimization="true"
+				AdditionalIncludeDirectories="..\..\..\encoder\core\inc,..\..\..\api\svc,..\..\..\WelsThreadLib\api"
+				PreprocessorDefinitions="WIN32;NDEBUG;_LIB;WELS_SVC;ENCODER_CORE;X86_ASM;HAVE_CACHE_LINE_ALIGN;MT_ENABLED;"
+				StringPooling="true"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="true"
+				PrecompiledHeaderFile=".\..\..\..\obj\encoder\core\Release/WelsEncCore.pch"
+				AssemblerListingLocation=".\..\..\..\obj\encoder\core\Release/"
+				ObjectFile=".\..\..\..\obj\encoder\core\Release/"
+				ProgramDataBaseFileName=".\..\..\..\obj\encoder\core\Release/"
 				WarningLevel="3"
 				SuppressStartupBanner="true"
 				DebugInformationFormat="3"
@@ -233,7 +232,7 @@
 			/>
 			<Tool
 				Name="VCResourceCompilerTool"
-				PreprocessorDefinitions="_DEBUG"
+				PreprocessorDefinitions="NDEBUG"
 				Culture="1033"
 			/>
 			<Tool
@@ -241,6 +240,7 @@
 			/>
 			<Tool
 				Name="VCLibrarianTool"
+				AdditionalOptions="/LTCG"
 				OutputFile="$(OutDir)\welsecore.lib"
 				SuppressStartupBanner="true"
 			/>
@@ -299,7 +299,7 @@
 				FavorSizeOrSpeed="1"
 				WholeProgramOptimization="true"
 				AdditionalIncludeDirectories="..\..\..\encoder\core\inc,..\..\..\api\svc,..\..\..\WelsThreadLib\api"
-				PreprocessorDefinitions="WIN64;NDEBUG;_LIB;WELS_SVC;ENCODER_CORE;HAVE_CACHE_LINE_ALIGN;MT_ENABLED"
+				PreprocessorDefinitions="WIN64;NDEBUG;_LIB;WELS_SVC;ENCODER_CORE;HAVE_CACHE_LINE_ALIGN;MT_ENABLED;X86_ASM"
 				StringPooling="true"
 				RuntimeLibrary="2"
 				EnableFunctionLevelLinking="true"
@@ -368,7 +368,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -377,7 +377,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -408,7 +408,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -417,7 +417,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -448,7 +448,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -457,7 +457,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -488,7 +488,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -497,7 +497,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -528,7 +528,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -537,7 +537,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -568,21 +568,21 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
 						AdditionalIncludeDirectories=""
-						PreprocessorDefinitions=""
+						PreprocessorDefinitions="OUPUT_REF_PIC"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
 						AdditionalIncludeDirectories=""
-						PreprocessorDefinitions="OUPUT_REF_PIC"
+						PreprocessorDefinitions=""
 					/>
 				</FileConfiguration>
 				<FileConfiguration
@@ -608,7 +608,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -617,7 +617,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -648,7 +648,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -657,7 +657,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -688,7 +688,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -697,7 +697,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -728,7 +728,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -737,7 +737,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -768,7 +768,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -777,7 +777,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -808,7 +808,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -817,7 +817,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -852,7 +852,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -861,7 +861,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -892,7 +892,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -901,7 +901,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -932,7 +932,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -941,7 +941,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -972,7 +972,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -981,7 +981,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1012,7 +1012,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1021,7 +1021,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1052,7 +1052,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1061,7 +1061,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1096,7 +1096,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1105,7 +1105,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1140,7 +1140,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1149,7 +1149,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1180,7 +1180,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1189,7 +1189,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1220,7 +1220,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1229,7 +1229,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1260,7 +1260,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1269,7 +1269,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1300,7 +1300,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1309,7 +1309,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1340,7 +1340,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1349,7 +1349,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1380,7 +1380,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1389,7 +1389,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1420,7 +1420,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1429,7 +1429,7 @@
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCLCompilerTool"
@@ -1686,7 +1686,7 @@
 			Filter="*.asm;*.inc"
 			>
 			<File
-				RelativePath="..\..\..\encoder\core\asm\asm_inc.asm"
+				RelativePath="..\..\..\encoder\core\asm\coeff.asm"
 				>
 				<FileConfiguration
 					Name="Debug|Win32"
@@ -1693,80 +1693,40 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Debug|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|x64"
-					ExcludedFromBuild="true"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
-			</File>
-			<File
-				RelativePath="..\..\..\encoder\core\asm\coeff.asm"
-				>
 				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-						Outputs="$(IntDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
 					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-						Outputs="$(IntDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 			</File>
 			<File
-				RelativePath="..\..\..\encoder\core\asm\cpuid.asm"
+				RelativePath="..\..\..\common\cpuid.asm"
 				>
 				<FileConfiguration
 					Name="Debug|Win32"
@@ -1773,36 +1733,34 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
@@ -1815,42 +1773,40 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 			</File>
 			<File
-				RelativePath="..\..\..\encoder\core\asm\deblock.asm"
+				RelativePath="..\..\..\common\deblock.asm"
 				>
 				<FileConfiguration
 					Name="Debug|Win32"
@@ -1857,42 +1813,40 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 			</File>
 			<File
-				RelativePath="..\..\..\encoder\core\asm\expand_picture.asm"
+				RelativePath="..\..\..\common\expand_picture.asm"
 				>
 				<FileConfiguration
 					Name="Debug|Win32"
@@ -1899,36 +1853,34 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
@@ -1941,42 +1893,40 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 			</File>
 			<File
-				RelativePath="..\..\..\encoder\core\asm\intra_pred_util.asm"
+				RelativePath="..\..\..\common\mb_copy.asm"
 				>
 				<FileConfiguration
 					Name="Debug|Win32"
@@ -1983,42 +1933,40 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 			</File>
 			<File
-				RelativePath="..\..\..\encoder\core\asm\mb_copy.asm"
+				RelativePath="..\..\..\common\mc_chroma.asm"
 				>
 				<FileConfiguration
 					Name="Debug|Win32"
@@ -2025,42 +1973,40 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 			</File>
 			<File
-				RelativePath="..\..\..\encoder\core\asm\mc_chroma.asm"
+				RelativePath="..\..\..\common\mc_luma.asm"
 				>
 				<FileConfiguration
 					Name="Debug|Win32"
@@ -2067,78 +2013,34 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-						Outputs="$(IntDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
 					Name="Debug|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|x64"
-					ExcludedFromBuild="true"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-						Outputs="$(IntDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-			</File>
-			<File
-				RelativePath="..\..\..\encoder\core\asm\mc_luma.asm"
-				>
-				<FileConfiguration
-					Name="Debug|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-						Outputs="$(IntDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
 					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-						Outputs="$(IntDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
@@ -2151,36 +2053,34 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
@@ -2193,36 +2093,34 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
@@ -2235,36 +2133,34 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
@@ -2277,42 +2173,40 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 			</File>
 			<File
-				RelativePath="..\..\..\encoder\core\asm\vaa.asm"
+				RelativePath="..\..\..\common\vaa.asm"
 				>
 				<FileConfiguration
 					Name="Debug|Win32"
@@ -2319,36 +2213,34 @@
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX  -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Release|Win32"
+					Name="Debug|x64"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
-					Name="Debug|x64"
-					ExcludedFromBuild="true"
+					Name="Release|Win32"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX  -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
-					ExcludedFromBuild="true"
 					>
 					<Tool
 						Name="VCCustomBuildTool"
-						CommandLine="nasm  -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
+						CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
--- a/codec/build/win32/enc/WelsEncCore_2010.vcxproj
+++ b/codec/build/win32/enc/WelsEncCore_2010.vcxproj
@@ -127,7 +127,7 @@
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <AdditionalIncludeDirectories>..\..\..\encoder\core\inc;..\..\..\api\svc;..\..\..\WelsThreadLib\api;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>WIN64;_DEBUG;_LIB;WELS_SVC;ENCODER_CORE;HAVE_CACHE_LINE_ALIGN;MT_ENABLED;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WIN64;_DEBUG;X86_ASM;_LIB;WELS_SVC;ENCODER_CORE;HAVE_CACHE_LINE_ALIGN;MT_ENABLED;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <PrecompiledHeaderOutputFile>.\..\..\..\obj\encoder\core\Debug/WelsEncCore.pch</PrecompiledHeaderOutputFile>
@@ -197,7 +197,7 @@
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <WholeProgramOptimization>true</WholeProgramOptimization>
       <AdditionalIncludeDirectories>..\..\..\encoder\core\inc;..\..\..\api\svc;..\..\..\WelsThreadLib\api;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>WIN64;NDEBUG;_LIB;WELS_SVC;ENCODER_CORE;HAVE_CACHE_LINE_ALIGN;MT_ENABLED;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>WIN64;NDEBUG;X86_ASM;_LIB;WELS_SVC;ENCODER_CORE;HAVE_CACHE_LINE_ALIGN;MT_ENABLED;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <StringPooling>true</StringPooling>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <FunctionLevelLinking>true</FunctionLevelLinking>
@@ -565,255 +565,154 @@
     <ClInclude Include="..\..\..\encoder\core\inc\wels_preprocess.h" />
   </ItemGroup>
   <ItemGroup>
-    <CustomBuild Include="..\..\..\encoder\core\asm\asm_inc.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </CustomBuild>
     <CustomBuild Include="..\..\..\encoder\core\asm\coeff.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\cpuid.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </CustomBuild>
     <CustomBuild Include="..\..\..\encoder\core\asm\dct.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\deblock.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+    <CustomBuild Include="..\..\..\encoder\core\asm\intra_pred.asm">
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\expand_picture.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+    <CustomBuild Include="..\..\..\encoder\core\asm\memzero.asm">
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\intra_pred.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+    <CustomBuild Include="..\..\..\encoder\core\asm\quant.asm">
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\intra_pred_util.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+    <CustomBuild Include="..\..\..\encoder\core\asm\satd_sad.asm">
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\mb_copy.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+    <CustomBuild Include="..\..\..\encoder\core\asm\score.asm">
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\mc_chroma.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\..\..\common\cpuid.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\mc_luma.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+    <CustomBuild Include="..\..\..\common\deblock.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\memzero.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+    <CustomBuild Include="..\..\..\common\expand_picture.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\quant.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+    <CustomBuild Include="..\..\..\common\mb_copy.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\satd_sad.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+    <CustomBuild Include="..\..\..\common\mc_chroma.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\score.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+    <CustomBuild Include="..\..\..\common\mc_luma.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\vaa.asm">
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+    <CustomBuild Include="..\..\..\common\vaa.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
-      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm  -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
-</Command>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
       <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\  -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
     </CustomBuild>
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
--- a/codec/build/win32/enc/WelsEncCore_2010.vcxproj.filters
+++ b/codec/build/win32/enc/WelsEncCore_2010.vcxproj.filters
@@ -278,52 +278,46 @@
     </ClInclude>
   </ItemGroup>
   <ItemGroup>
-    <CustomBuild Include="..\..\..\encoder\core\asm\asm_inc.asm">
-      <Filter>ASM</Filter>
-    </CustomBuild>
     <CustomBuild Include="..\..\..\encoder\core\asm\coeff.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\cpuid.asm">
-      <Filter>ASM</Filter>
-    </CustomBuild>
     <CustomBuild Include="..\..\..\encoder\core\asm\dct.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\deblock.asm">
+    <CustomBuild Include="..\..\..\encoder\core\asm\intra_pred.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\expand_picture.asm">
+    <CustomBuild Include="..\..\..\encoder\core\asm\memzero.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\intra_pred.asm">
+    <CustomBuild Include="..\..\..\encoder\core\asm\quant.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\intra_pred_util.asm">
+    <CustomBuild Include="..\..\..\encoder\core\asm\satd_sad.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\mb_copy.asm">
+    <CustomBuild Include="..\..\..\encoder\core\asm\score.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\mc_chroma.asm">
+    <CustomBuild Include="..\..\..\common\mc_luma.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\mc_luma.asm">
+    <CustomBuild Include="..\..\..\common\mc_chroma.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\memzero.asm">
+    <CustomBuild Include="..\..\..\common\mb_copy.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\quant.asm">
+    <CustomBuild Include="..\..\..\common\expand_picture.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\satd_sad.asm">
+    <CustomBuild Include="..\..\..\common\deblock.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\score.asm">
+    <CustomBuild Include="..\..\..\common\cpuid.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
-    <CustomBuild Include="..\..\..\encoder\core\asm\vaa.asm">
+    <CustomBuild Include="..\..\..\common\vaa.asm">
       <Filter>ASM</Filter>
     </CustomBuild>
   </ItemGroup>
--- a/codec/build/win32/enc/WelsEncoder_2008.sln
+++ b/codec/build/win32/enc/WelsEncoder_2008.sln
@@ -17,7 +17,7 @@
 		{E8DFAFA1-8DAC-4127-8D27-FBD5819EE562} = {E8DFAFA1-8DAC-4127-8D27-FBD5819EE562}
 	EndProjectSection
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WelsVP", "..\..\..\..\processing\build\win32\WelsVP_2008.vcproj", "{E8DFAFA1-8DAC-4127-8D27-FBD5819EE562}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WelsVP", "..\..\..\processing\build\win32\WelsVP_2008.vcproj", "{E8DFAFA1-8DAC-4127-8D27-FBD5819EE562}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
--- a/codec/build/win32/enc/WelsEncoder_2010.sln
+++ b/codec/build/win32/enc/WelsEncoder_2010.sln
@@ -10,7 +10,7 @@
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "encConsole_2010", "encConsole_2010.vcxproj", "{8509E2A8-2CBD-49E2-B564-3EFF1E927459}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WelsVP_2010", "..\..\..\..\processing\build\win32\WelsVP_2010.vcxproj", "{E8DFAFA1-8DAC-4127-8D27-FBD5819EE562}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WelsVP_2010", "..\..\..\processing\build\win32\WelsVP_2010.vcxproj", "{E8DFAFA1-8DAC-4127-8D27-FBD5819EE562}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
--- /dev/null
+++ b/codec/common/asm_inc.asm
@@ -1,0 +1,509 @@
+;*!
+;* \copy
+;*     Copyright (c)  2009-2013, Cisco Systems
+;*     All rights reserved.
+;*
+;*     Redistribution and use in source and binary forms, with or without
+;*     modification, are permitted provided that the following conditions
+;*     are met:
+;*
+;*        * Redistributions of source code must retain the above copyright
+;*          notice, this list of conditions and the following disclaimer.
+;*
+;*        * Redistributions in binary form must reproduce the above copyright
+;*          notice, this list of conditions and the following disclaimer in
+;*          the documentation and/or other materials provided with the
+;*          distribution.
+;*
+;*     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;*     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;*     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+;*     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+;*     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+;*     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+;*     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+;*     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+;*     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+;*     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+;*     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+;*     POSSIBILITY OF SUCH DAMAGE.
+;*
+;*
+;*  sse2inc.asm
+;*
+;*  Abstract
+;*      macro and constant
+;*
+;*  History
+;*      8/5/2009 Created
+;*
+;*
+;*************************************************************************/
+;***********************************************************************
+; Options, for DEBUG
+;***********************************************************************
+
+%if 1
+	%define MOVDQ movdqa
+%else
+	%define MOVDQ movdqu
+%endif
+
+%if 1
+	%define WELSEMMS	emms
+%else
+	%define WELSEMMS
+%endif
+
+
+;***********************************************************************
+; Macros
+;***********************************************************************
+
+DEFAULT REL
+
+%ifdef WIN64 ; Windows x64 ;************************************
+
+BITS 64
+
+%define arg1 rcx
+%define arg2 rdx
+%define arg3 r8
+%define arg4 r9
+%define arg5 [rsp + push_num*8 + 40]
+%define arg6 [rsp + push_num*8 + 48]
+%define arg7 [rsp + push_num*8 + 56]
+%define arg8 [rsp + push_num*8 + 64]
+%define arg9 [rsp + push_num*8 + 72]
+%define arg10 [rsp + push_num*8 + 80]
+
+%define r0 rcx
+%define r1 rdx
+%define r2 r8
+%define r3 r9
+%define r4 rax
+%define r5 r10
+%define r6 r11
+%define r7 rsp
+
+%define r0d ecx
+%define r1d edx
+%define r2d r8d
+%define r3d r9d
+%define r4d eax
+%define r5d r10d
+%define r6d r11d
+
+%define r0w  cx
+%define r1w  dx
+%define r2w  r8w
+%define r3w  r9w
+
+%define r0b  cl
+%define r1b  dl
+%define r2b  r8l
+%define r3b  r9l
+
+%define  PUSHRFLAGS     pushfq
+%define  POPRFLAGS      popfq
+%define  retrq          rax
+%define  retrd          eax
+
+%elifdef UNIX64 ; Unix x64 ;************************************
+
+BITS 64
+
+%define arg1 rdi
+%define arg2 rsi
+%define arg3 rdx
+%define arg4 rcx
+%define arg5 r8
+%define arg6 r9
+%define arg7 [rsp + push_num*8 + 8]
+%define arg8 [rsp + push_num*8 + 16]
+%define arg9 [rsp + push_num*8 + 24]
+%define arg10 [rsp + push_num*8 + 32]
+
+%define r0 rdi
+%define r1 rsi
+%define r2 rdx
+%define r3 rcx
+%define r4 r8
+%define r5 r9
+%define r6 r10
+%define r7 rsp
+
+%define r0d edi
+%define r1d esi
+%define r2d edx
+%define r3d ecx
+%define r4d r8d
+%define r5d r9d
+%define r6d r10d
+
+%define r0w  di
+%define r1w  si
+%define r2w  dx
+%define r3w  cx
+
+%define r0b  dil
+%define r1b  sil
+%define r2b  dl
+%define r3b  cl
+
+%define  PUSHRFLAGS     pushfq
+%define  POPRFLAGS      popfq
+%define  retrq          rax
+%define  retrd          eax 
+
+%elifdef X86_32 ; X86_32 ;************************************
+
+BITS 32
+
+%define arg1 [esp + push_num*4 + 4]
+%define arg2 [esp + push_num*4 + 8]
+%define arg3 [esp + push_num*4 + 12]
+%define arg4 [esp + push_num*4 + 16]
+%define arg5 [esp + push_num*4 + 20]
+%define arg6 [esp + push_num*4 + 24]
+%define arg7 [esp + push_num*4 + 28]
+%define arg8 [esp + push_num*4 + 32]
+%define arg9 [esp + push_num*4 + 36]
+%define arg10 [esp + push_num*4 + 40]
+
+%define r0 eax
+%define r1 ecx
+%define r2 edx
+%define r3 ebx
+%define r4 esi
+%define r5 edi
+%define r6 ebp
+%define r7 esp
+
+%define r0d eax
+%define r1d ecx
+%define r2d edx
+%define r3d ebx
+%define r4d esi
+%define r5d edi
+%define r6d ebp
+
+%define r0w ax
+%define r1w cx
+%define r2w dx
+%define r3w bx
+
+%define r0b al
+%define r1b cl
+%define r2b dl
+%define r3b bl
+
+%define  PUSHRFLAGS     pushfd
+%define  POPRFLAGS      popfd
+%define  retrq          eax      ; 32 bit mode do not support 64 bits regesters
+%define  retrd          eax
+
+%endif
+
+%macro LOAD_PARA 2
+    mov %1, %2
+%endmacro
+
+%macro LOAD_1_PARA 0
+    %ifdef X86_32
+	mov r0, [esp + push_num*4 + 4]
+    %endif
+%endmacro
+
+%macro LOAD_2_PARA 0
+    %ifdef X86_32
+        mov r0, [esp + push_num*4 + 4]
+        mov r1, [esp + push_num*4 + 8]
+    %endif
+%endmacro
+
+%macro LOAD_3_PARA 0
+    %ifdef X86_32
+        mov r0, [esp + push_num*4 + 4]
+	mov r1, [esp + push_num*4 + 8]
+	mov r2, [esp + push_num*4 + 12]
+    %endif
+%endmacro
+
+%macro LOAD_4_PARA 0
+    %ifdef X86_32
+        push r3
+        %assign  push_num push_num+1	
+        mov r0, [esp + push_num*4 + 4]
+        mov r1, [esp + push_num*4 + 8]
+        mov r2, [esp + push_num*4 + 12]
+        mov r3, [esp + push_num*4 + 16]
+    %endif
+%endmacro
+
+%macro LOAD_5_PARA 0
+    %ifdef X86_32
+        push r3
+        push r4
+        %assign  push_num push_num+2	
+        mov r0, [esp + push_num*4 + 4]
+        mov r1, [esp + push_num*4 + 8]
+        mov r2, [esp + push_num*4 + 12]
+        mov r3, [esp + push_num*4 + 16]
+        mov r4, [esp + push_num*4 + 20]
+    %elifdef WIN64
+        mov r4, [rsp + push_num*8 + 40]
+    %endif
+%endmacro
+
+%macro LOAD_6_PARA 0
+    %ifdef X86_32
+	push r3
+        push r4
+        push r5
+        %assign  push_num push_num+3	
+        mov r0, [esp + push_num*4 + 4]
+        mov r1, [esp + push_num*4 + 8]
+        mov r2, [esp + push_num*4 + 12]
+        mov r3, [esp + push_num*4 + 16]
+        mov r4, [esp + push_num*4 + 20]
+        mov r5, [esp + push_num*4 + 24]
+    %elifdef WIN64
+        mov r4, [rsp + push_num*8 + 40]
+        mov r5, [rsp + push_num*8 + 48]
+    %endif
+%endmacro
+
+%macro LOAD_7_PARA 0
+    %ifdef X86_32
+        push r3
+        push r4
+        push r5
+        push r6
+        %assign  push_num push_num+4	
+        mov r0, [esp + push_num*4 + 4]
+        mov r1, [esp + push_num*4 + 8]
+        mov r2, [esp + push_num*4 + 12]
+        mov r3, [esp + push_num*4 + 16]
+        mov r4, [esp + push_num*4 + 20]
+        mov r5, [esp + push_num*4 + 24]
+        mov r6, [esp + push_num*4 + 28]
+    %elifdef WIN64
+        mov r4, [rsp + push_num*8 + 40]
+        mov r5, [rsp + push_num*8 + 48]
+        mov r6, [rsp + push_num*8 + 56]
+    %elifdef UNIX64
+        mov r6, [rsp + push_num*8 + 8]
+    %endif
+%endmacro
+
+
+
+%macro LOAD_4_PARA_POP 0
+    %ifdef X86_32
+	pop r3
+    %endif
+%endmacro
+
+%macro LOAD_5_PARA_POP 0
+    %ifdef X86_32
+        pop r4
+	pop r3
+    %endif
+%endmacro
+
+%macro LOAD_6_PARA_POP 0
+    %ifdef X86_32
+        pop r5
+  	pop r4
+ 	pop r3
+    %endif
+%endmacro
+
+%macro LOAD_7_PARA_POP 0
+    %ifdef X86_32
+        pop r6
+        pop r5
+        pop r4
+        pop r3
+    %endif
+%endmacro
+
+%macro SIGN_EXTENTION 2
+    %ifndef X86_32
+            movsx %1, %2
+    %endif
+%endmacro
+ 
+%macro WELS_EXTERN 1
+    %ifdef PREFIX
+        global _%1
+        %define %1 _%1
+    %else
+        global %1
+    %endif
+%endmacro
+
+%macro WELS_AbsW 2
+	pxor        %2, %2
+    psubw       %2, %1
+    pmaxsw      %1, %2
+%endmacro
+
+%macro MMX_XSwap  4
+    movq		%4, %2
+    punpckh%1   %4, %3
+    punpckl%1   %2, %3
+%endmacro
+
+; pOut mm1, mm4, mm5, mm3
+%macro MMX_Trans4x4W 5
+    MMX_XSwap wd, %1, %2, %5
+    MMX_XSwap wd, %3, %4, %2
+    MMX_XSwap dq, %1, %3, %4
+    MMX_XSwap dq, %5, %2, %3
+%endmacro
+
+;for TRANSPOSE
+%macro SSE2_XSawp 4
+    movdqa      %4, %2
+    punpckl%1   %2, %3
+    punpckh%1   %4, %3
+%endmacro
+
+; in: xmm1, xmm2, xmm3, xmm4  pOut:  xmm1, xmm4, xmm5, mm3
+%macro SSE2_Trans4x4D 5
+    SSE2_XSawp dq,  %1, %2, %5
+    SSE2_XSawp dq,  %3, %4, %2
+    SSE2_XSawp qdq, %1, %3, %4
+    SSE2_XSawp qdq, %5, %2, %3
+%endmacro
+
+;in: xmm0, xmm1, xmm2, xmm3  pOut:  xmm0, xmm1, xmm3, xmm4
+%macro SSE2_TransTwo4x4W 5
+    SSE2_XSawp wd,  %1, %2, %5
+    SSE2_XSawp wd,  %3, %4, %2
+    SSE2_XSawp dq,  %1, %3, %4
+    SSE2_XSawp dq,  %5, %2, %3
+    SSE2_XSawp qdq, %1, %5, %2
+    SSE2_XSawp qdq, %4, %3, %5
+%endmacro
+
+;in:  m1, m2, m3, m4, m5, m6, m7, m8
+;pOut: m5, m3, m4, m8, m6, m2, m7, m1
+%macro SSE2_TransTwo8x8B 9
+	movdqa	%9,	%8
+	SSE2_XSawp bw,  %1, %2, %8
+	SSE2_XSawp bw,  %3, %4, %2
+	SSE2_XSawp bw,  %5, %6, %4
+	movdqa	%6, %9
+	movdqa	%9, %4
+	SSE2_XSawp bw,  %7, %6, %4
+
+	SSE2_XSawp wd,  %1, %3, %6
+	SSE2_XSawp wd,  %8, %2, %3
+	SSE2_XSawp wd,  %5, %7, %2
+	movdqa	%7, %9
+	movdqa	%9, %3
+	SSE2_XSawp wd,  %7, %4, %3
+
+	SSE2_XSawp dq,  %1, %5, %4
+	SSE2_XSawp dq,  %6, %2, %5
+	SSE2_XSawp dq,  %8, %7, %2
+	movdqa	%7, %9
+	movdqa	%9, %5
+	SSE2_XSawp dq,  %7, %3, %5
+
+	SSE2_XSawp qdq,  %1, %8, %3
+	SSE2_XSawp qdq,  %4, %2, %8
+	SSE2_XSawp qdq,  %6, %7, %2
+	movdqa	%7, %9
+	movdqa	%9, %1
+	SSE2_XSawp qdq,  %7, %5, %1
+	movdqa	%5, %9
+%endmacro
+
+;xmm0, xmm6, xmm7, [eax], [ecx]
+;xmm7 = 0, eax = pix1, ecx = pix2, xmm0 save the result
+%macro SSE2_LoadDiff8P 5
+    movq         %1, %4
+    punpcklbw    %1, %3
+    movq         %2, %5
+    punpcklbw    %2, %3
+    psubw        %1, %2
+%endmacro
+
+; m2 = m1 + m2, m1 = m1 - m2
+%macro SSE2_SumSub 3
+	movdqa  %3, %2
+    paddw   %2, %1
+    psubw   %1, %3
+%endmacro
+
+
+%macro butterfly_1to16_sse	3	; xmm? for dst, xmm? for tmp, one byte for pSrc [generic register name: a/b/c/d]
+	mov %3h, %3l
+	movd %1, e%3x		; i.e, 1% = eax (=b0)
+	pshuflw %2, %1, 00h	; ..., b0 b0 b0 b0 b0 b0 b0 b0
+	pshufd %1, %2, 00h	; b0 b0 b0 b0, b0 b0 b0 b0, b0 b0 b0 b0, b0 b0 b0 b0
+%endmacro
+
+;copy a dw into a xmm for 8 times
+%macro  SSE2_Copy8Times 2
+		movd	%1, %2
+		punpcklwd %1, %1
+		pshufd	%1,	%1,	0
+%endmacro
+
+;copy a db into a xmm for 16 times
+%macro  SSE2_Copy16Times 2
+		movd		%1, %2
+		pshuflw		%1, %1, 0
+		punpcklqdq	%1, %1
+		packuswb	%1,	%1
+%endmacro
+
+
+
+;***********************************************************************
+;preprocessor constants
+;***********************************************************************
+;dw 32,32,32,32,32,32,32,32 for xmm
+;dw 32,32,32,32 for mm
+%macro WELS_DW32 1
+	pcmpeqw %1,%1
+	psrlw %1,15
+	psllw %1,5
+%endmacro
+
+;dw 1, 1, 1, 1, 1, 1, 1, 1 for xmm
+;dw 1, 1, 1, 1 for mm
+%macro WELS_DW1 1
+	pcmpeqw %1,%1
+	psrlw %1,15
+%endmacro
+
+;all 0 for xmm and mm
+%macro	WELS_Zero 1
+	pxor %1, %1
+%endmacro
+
+;dd 1, 1, 1, 1 for xmm
+;dd 1, 1 for mm
+%macro WELS_DD1 1
+	pcmpeqw %1,%1
+	psrld %1,31
+%endmacro
+
+;dB 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+%macro WELS_DB1 1
+	pcmpeqw %1,%1
+	psrlw %1,15
+	packuswb %1,%1
+%endmacro
+
+
+
+
+
+
--- /dev/null
+++ b/codec/common/cpuid.asm
@@ -1,0 +1,220 @@
+;*!
+;* \copy
+;*     Copyright (c)  2009-2013, Cisco Systems
+;*     All rights reserved.
+;*
+;*     Redistribution and use in source and binary forms, with or without
+;*     modification, are permitted provided that the following conditions
+;*     are met:
+;*
+;*        * Redistributions of source code must retain the above copyright
+;*          notice, this list of conditions and the following disclaimer.
+;*
+;*        * Redistributions in binary form must reproduce the above copyright
+;*          notice, this list of conditions and the following disclaimer in
+;*          the documentation and/or other materials provided with the
+;*          distribution.
+;*
+;*     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;*     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;*     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+;*     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+;*     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+;*     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+;*     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+;*     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+;*     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+;*     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+;*     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+;*     POSSIBILITY OF SUCH DAMAGE.
+;*
+;*
+;*	cpu_mmx.asm
+;*
+;*  Abstract
+;*		verify cpuid feature support and cpuid detection
+;*
+;*  History
+;*      04/29/2009	Created
+;*
+;*************************************************************************/
+
+%include "asm_inc.asm"
+
+;******************************************************************************************
+; Macros
+;******************************************************************************************
+
+
+;******************************************************************************************
+; Code
+;******************************************************************************************
+
+SECTION .text
+
+; refer to "The IA-32 Intel(R) Architecture Software Developers Manual, Volume 2A A-M"
+; section CPUID - CPU Identification
+
+WELS_EXTERN WelsCPUIdVerify
+ALIGN 16
+;******************************************************************************************
+;   int32_t WelsCPUIdVerify()
+;******************************************************************************************
+WelsCPUIdVerify:
+    push    r1
+    PUSHRFLAGS
+    PUSHRFLAGS
+
+    pop      r1
+    mov      eax, r1d
+    xor      eax, 00200000h
+    xor      eax, r1d
+    POPRFLAGS
+    pop      r1
+    ret
+
+WELS_EXTERN WelsCPUId
+ALIGN 16
+;****************************************************************************************************
+;   void WelsCPUId( int32_t uiIndex, int32_t *pFeatureA, int32_t *pFeatureB, int32_t *pFeatureC, int32_t *pFeatureD )
+;****************************************************************************************************
+%ifdef       WIN64
+
+WelsCPUId:
+    push     rbx        
+    push     rdx    
+ 
+    mov      eax,     ecx
+    cpuid  
+    mov      [r9],    ecx
+    mov      [r8],    ebx
+    mov      rcx,    [rsp + 2*8 + 40]        
+    mov      [rcx],   edx
+    pop      rdx 
+    mov      [rdx],   eax
+
+    pop      rbx
+    ret
+
+%elifdef     UNIX64
+WelsCPUId:
+    push     rbx
+    push     rcx
+    push     rdx
+
+    mov      eax,     edi    
+    cpuid
+    mov      [r8],    edx
+    pop      rdx    
+    pop      r8
+    mov      [r8],   ecx
+    mov      [rdx],   ebx
+    mov      [rsi],   eax
+
+    pop      rbx
+    ret
+
+%elifdef     X86_32
+
+WelsCPUId:
+    push	ebx
+    push	edi
+
+    mov     eax, [esp+12]	; operating index
+    cpuid					; cpuid
+
+    ; processing various information return
+    mov     edi, [esp+16]
+    mov     [edi], eax
+    mov     edi, [esp+20]
+    mov     [edi], ebx
+    mov     edi, [esp+24]
+    mov     [edi], ecx
+    mov     edi, [esp+28]
+    mov     [edi], edx
+
+    pop	    edi
+    pop     ebx
+    ret
+
+%endif
+
+WELS_EXTERN WelsCPUSupportAVX
+; need call after cpuid=1 and eax, ecx flag got then
+ALIGN 16
+;****************************************************************************************************
+;   int32_t WelsCPUSupportAVX( uint32_t eax, uint32_t ecx )
+;****************************************************************************************************
+WelsCPUSupportAVX:
+%ifdef     WIN64
+        mov   eax,    ecx
+        mov   ecx,    edx
+%elifdef   UNIX64
+        mov eax, edi
+        mov ecx, esi
+%else 
+        mov eax, [esp+4]
+        mov ecx, [esp+8]  
+%endif
+
+        ; refer to detection of AVX addressed in INTEL AVX manual document
+        and ecx, 018000000H
+        cmp ecx, 018000000H             ; check both OSXSAVE and AVX feature flags
+        jne avx_not_supported
+        ; processor supports AVX instructions and XGETBV is enabled by OS
+        mov ecx, 0                              ; specify 0 for XFEATURE_ENABLED_MASK register
+        XGETBV                                  ; result in EDX:EAX
+        and eax, 06H
+        cmp eax, 06H                    ; check OS has enabled both XMM and YMM state support
+        jne avx_not_supported
+        mov eax, 1
+        ret
+avx_not_supported:
+        mov eax, 0
+        ret
+
+
+WELS_EXTERN  WelsCPUSupportFMA
+; need call after cpuid=1 and eax, ecx flag got then
+ALIGN 16
+;****************************************************************************************************
+;   int32_t WelsCPUSupportFMA( uint32_t eax, uint32_t ecx )
+;****************************************************************************************************
+WelsCPUSupportFMA:
+%ifdef     WIN64
+        mov   eax,   ecx
+        mov   ecx,   edx
+%elifdef   UNIX64
+        mov   eax,   edi
+        mov   ecx,   esi
+%else
+	mov eax, [esp+4]
+	mov ecx, [esp+8]
+%endif
+	; refer to detection of FMA addressed in INTEL AVX manual document
+	and ecx, 018001000H
+	cmp ecx, 018001000H		; check OSXSAVE, AVX, FMA feature flags
+	jne fma_not_supported
+	; processor supports AVX,FMA instructions and XGETBV is enabled by OS
+	mov ecx, 0				; specify 0 for XFEATURE_ENABLED_MASK register
+	XGETBV					; result in EDX:EAX
+	and eax, 06H
+	cmp eax, 06H			; check OS has enabled both XMM and YMM state support
+	jne fma_not_supported
+	mov eax, 1
+	ret
+fma_not_supported:
+	mov eax, 0
+	ret
+
+WELS_EXTERN WelsEmms
+ALIGN 16
+;******************************************************************************************
+;   void WelsEmms()
+;******************************************************************************************
+WelsEmms:
+	emms	; empty mmx technology states
+	ret
+
+
+
--- /dev/null
+++ b/codec/common/deblock.asm
@@ -1,0 +1,5325 @@
+;*!
+;* \copy
+;*     Copyright (c)  2009-2013, Cisco Systems
+;*     All rights reserved.
+;*
+;*     Redistribution and use in source and binary forms, with or without
+;*     modification, are permitted provided that the following conditions
+;*     are met:
+;*
+;*        * Redistributions of source code must retain the above copyright
+;*          notice, this list of conditions and the following disclaimer.
+;*
+;*        * Redistributions in binary form must reproduce the above copyright
+;*          notice, this list of conditions and the following disclaimer in
+;*          the documentation and/or other materials provided with the
+;*          distribution.
+;*
+;*     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;*     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;*     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+;*     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+;*     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+;*     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+;*     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+;*     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+;*     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+;*     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+;*     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+;*     POSSIBILITY OF SUCH DAMAGE.
+;*
+;*
+;*  deblock.asm
+;*
+;*  Abstract
+;*      edge loop
+;*
+;*  History
+;*      08/07/2009 Created
+;*
+;*
+;*************************************************************************/
+%include "asm_inc.asm"
+
+;*******************************************************************************
+; Macros and other preprocessor constants
+;*******************************************************************************
+
+%ifdef FORMAT_COFF
+SECTION .rodata pData
+%else
+SECTION .rodata align=16
+%endif
+
+ALIGN   16
+FOUR_16B_SSE2:   dw   4, 4, 4, 4, 4, 4, 4, 4
+
+
+SECTION .text
+
+%ifdef  WIN64 
+
+
+WELS_EXTERN   DeblockLumaLt4V_sse2
+
+DeblockLumaLt4V_sse2:
+  push        rbp      
+  mov         r11,[esp + 16 + 20h]  ; pTC                                                    
+  sub         rsp,1B0h                                                       
+  lea         rbp,[rsp+20h]                                                  
+  movd        xmm4,r8d                                                                                                  
+  movd        xmm2,r9d                                                       
+  mov         qword [rbp+180h],r12                                       
+  mov         r10,rcx                                                        
+  movsxd      r12,edx                                                        
+  add         edx,edx                                                        
+  movsxd      rdx,edx                                                        
+  sub         r10,r12                                                        
+  movsx       r8d,byte [r11]                                             
+  pxor        xmm3,xmm3                                                      
+  punpcklwd   xmm2,xmm2                                                      
+  movaps      [rbp+50h],xmm14                                    
+  lea         rax,[r12+r12*2]                                                
+  movdqa      xmm14,[rdx+rcx]                                    
+  neg         rax                                                            
+  pshufd      xmm0,xmm2,0                                                    
+  movd        xmm2,r8d                                                       
+  movsx       edx,byte [r11+1]                                           
+  movsx       r8d,byte [r11+2]                                           
+  movsx       r11d,byte [r11+3]                                          
+  movaps      [rbp+70h],xmm12                                    
+  movd        xmm1,edx                                                       
+  movaps      [rbp+80h],xmm11                                    
+  movd        xmm12,r8d                                                      
+  movd        xmm11,r11d                                                     
+  movdqa      xmm5, [rax+rcx]                                     
+  lea         rax,[r12+r12]                                                  
+  punpcklwd   xmm12,xmm12                                                    
+  neg         rax                                                            
+  punpcklwd   xmm11,xmm11                                                    
+  movaps      [rbp],xmm8                                         
+  movdqa      xmm8, [r10]                                         
+  punpcklwd   xmm2,xmm2                                                      
+  punpcklwd   xmm1,xmm1                                                      
+  punpcklqdq  xmm12,xmm12                                                    
+  punpcklqdq  xmm11,xmm11                                                    
+  punpcklqdq  xmm2,xmm2                                                      
+  punpcklqdq  xmm1,xmm1                                                      
+  shufps      xmm12,xmm11,88h                                                
+  movdqa      xmm11,xmm8                                                     
+  movaps      [rbp+30h],xmm9                                     
+  movdqa      xmm9,[rcx]                                         
+  shufps      xmm2,xmm1,88h                                                  
+  movdqa      xmm1,xmm5                                                      
+  punpcklbw   xmm11,xmm3                                                     
+  movaps      [rbp+20h],xmm6                                     
+  movaps      [rbp+60h],xmm13                                    
+  movdqa      xmm13,xmm11                                                    
+  movaps      [rbp+90h],xmm10                                    
+  movdqa      xmm10,xmm9                                                     
+  movdqa      xmm6,[rax+rcx]                                     
+  punpcklbw   xmm1,xmm3                                                      
+  movaps      [rbp+0A0h],xmm12                                   
+  psubw       xmm13,xmm1                                                     
+  movaps      [rbp+40h],xmm15                                    
+  movdqa      xmm15,xmm14                                                    
+  movaps      [rbp+10h],xmm7                                     
+  movdqa      xmm7,xmm6                                                      
+  punpcklbw   xmm10,xmm3                                                     
+  movdqa      xmm12,[r12+rcx]                                    
+  punpcklbw   xmm7,xmm3                                                      
+  punpcklbw   xmm12,xmm3                                                     
+  punpcklbw   xmm15,xmm3                                                     
+  pabsw       xmm3,xmm13                                                     
+  movdqa      xmm13,xmm10                                                    
+  psubw       xmm13,xmm15                                                    
+  movdqa      [rbp+0F0h],xmm15                                   
+  pabsw       xmm15,xmm13                                                    
+  movdqa      xmm13,xmm11                                                    
+  movdqa      [rbp+0B0h],xmm1                                    
+  movdqa      xmm1,xmm0                                                      
+  pavgw       xmm13,xmm10                                                    
+  pcmpgtw     xmm1,xmm3                                                      
+  movdqa      [rbp+120h],xmm13                                   
+  movaps      xmm13,xmm2                                                     
+  punpcklwd   xmm4,xmm4                                                      
+  movdqa      xmm3,xmm0                                                      
+  movdqa      [rbp+100h],xmm1                                    
+  psubw       xmm13,xmm1                                                     
+  movdqa      xmm1,xmm10                                                     
+  pcmpgtw     xmm3,xmm15                                                     
+  pshufd      xmm4,xmm4,0                                                    
+  psubw       xmm1,xmm11                                                     
+  movdqa      [rbp+0D0h],xmm10                                   
+  psubw       xmm13,xmm3                                                     
+  movdqa      [rbp+110h],xmm3                                    
+  pabsw       xmm15,xmm1                                                     
+  movdqa      xmm3,xmm4                                                      
+  psubw       xmm10,xmm12                                                    
+  pcmpgtw     xmm3,xmm15                                                     
+  pabsw       xmm15,xmm10                                                    
+  movdqa      xmm10,xmm0                                                     
+  psllw       xmm1,2                                                         
+  movdqa      [rbp+0C0h],xmm11                                   
+  psubw       xmm11,xmm7                                                     
+  pcmpgtw     xmm10,xmm15                                                    
+  pabsw       xmm11,xmm11                                                    
+  movdqa      xmm15,xmm0                                                     
+  pand        xmm3,xmm10                                                     
+  pcmpgtw     xmm15,xmm11                                                    
+  movaps      xmm11,xmm2                                                     
+  pxor        xmm10,xmm10                                                    
+  pand        xmm3,xmm15                                                     
+  pcmpgtw     xmm11,xmm10                                                    
+  pcmpeqw     xmm10,xmm2                                                     
+  por         xmm11,xmm10                                                    
+  pand        xmm3,xmm11                                                     
+  movdqa      xmm11,xmm7                                                     
+  psubw       xmm11,xmm12                                                    
+  pxor        xmm15,xmm15                                                    
+  paddw       xmm11,xmm1                                                     
+  psubw       xmm15,xmm13                                                    
+  movdqa      [rbp+0E0h],xmm12                                   
+  paddw       xmm11,[FOUR_16B_SSE2] 
+  pxor        xmm12,xmm12                                                    
+  psraw       xmm11,3                                                        
+  punpckhbw   xmm8,xmm12                                                     
+  pmaxsw      xmm15,xmm11                                                    
+  punpckhbw   xmm5,xmm12                                                     
+  movdqa      xmm11,xmm8                                                     
+  pminsw      xmm13,xmm15                                                    
+  psubw       xmm11,xmm5                                                     
+  punpckhbw   xmm9,xmm12                                                     
+  pand        xmm13,xmm3                                                     
+  movdqa      [rbp+130h],xmm13                                   
+  pabsw       xmm13,xmm11                                                    
+  punpckhbw   xmm14,xmm12                                                    
+  movdqa      xmm11,xmm9                                                     
+  psubw       xmm11,xmm14                                                    
+  movdqa      xmm15,xmm0                                                     
+  movdqa      [rbp+140h],xmm14                                   
+  pabsw       xmm14,xmm11                                                    
+  movdqa      xmm11,xmm8                                                     
+  pcmpgtw     xmm15,xmm14                                                    
+  movdqa      xmm1,[r12+rcx]                                     
+  pavgw       xmm11,xmm9                                                     
+  movdqa      [rbp+170h],xmm11                                   
+  movdqa      xmm10,xmm9                                                     
+  punpckhbw   xmm6,xmm12                                                     
+  psubw       xmm10,xmm8                                                     
+  punpckhbw   xmm1,xmm12                                                     
+  movdqa      xmm12,xmm0                                                     
+  movaps      xmm11,[rbp+0A0h]                                   
+  pcmpgtw     xmm12,xmm13                                                    
+  movaps      xmm13,xmm11                                                    
+  psubw       xmm13,xmm12                                                    
+  movdqa      [rbp+160h],xmm15                                   
+  psubw       xmm13,xmm15                                                    
+  movdqa      xmm15,xmm9                                                     
+  psubw       xmm15,xmm1                                                     
+  movdqa      [rbp+150h],xmm12                                   
+  pabsw       xmm12,xmm10                                                    
+  pabsw       xmm14,xmm15                                                    
+  movdqa      xmm15,xmm8                                                     
+  pcmpgtw     xmm4,xmm12                                                     
+  movdqa      xmm12,xmm0                                                     
+  psubw       xmm15,xmm6                                                     
+  pcmpgtw     xmm12,xmm14                                                    
+  pabsw       xmm14,xmm15                                                    
+  psllw       xmm10,2                                                        
+  pcmpgtw     xmm0,xmm14                                                     
+  movdqa      xmm14,xmm6                                                     
+  psubw       xmm14,xmm1                                                     
+  pand        xmm4,xmm12                                                     
+  paddw       xmm14,xmm10                                                    
+  pand        xmm4,xmm0                                                      
+  paddw       xmm14,[FOUR_16B_SSE2] 
+  pxor        xmm15,xmm15                                                    
+  movaps      xmm12,xmm11                                                    
+  psubw       xmm15,xmm13                                                    
+  pxor        xmm0,xmm0                                                      
+  psraw       xmm14,3                                                        
+  pcmpgtw     xmm12,xmm0                                                     
+  pcmpeqw     xmm0,xmm11                                                     
+  pmaxsw      xmm15,xmm14                                                    
+  por         xmm12,xmm0                                                     
+  movdqa      xmm0,[rbp+120h]                                    
+  pminsw      xmm13,xmm15                                                    
+  movdqa      xmm15,[rbp+0B0h]                                   
+  movdqa      xmm10,xmm7                                                     
+  pand        xmm4,xmm12                                                     
+  paddw       xmm15,xmm0                                                     
+  pxor        xmm12,xmm12                                                    
+  paddw       xmm10,xmm7                                                     
+  movdqa      xmm14,xmm12                                                    
+  psubw       xmm15,xmm10                                                    
+  psubw       xmm14,xmm2                                                     
+  psraw       xmm15,1                                                        
+  pmaxsw      xmm15,xmm14                                                    
+  movdqa      xmm10,xmm6                                                     
+  pminsw      xmm15,xmm2                                                     
+  paddw       xmm10,xmm6                                                     
+  pand        xmm15,xmm3                                                     
+  psubw       xmm12,xmm11                                                    
+  pand        xmm15,[rbp+100h]                                   
+  pand        xmm13,xmm4                                                     
+  paddw       xmm7,xmm15                                                     
+  paddw       xmm8,xmm13                                                     
+  movdqa      xmm15,[rbp+170h]                                   
+  psubw       xmm9,xmm13                                                     
+  paddw       xmm5,xmm15                                                     
+  psubw       xmm5,xmm10                                                     
+  psraw       xmm5,1                                                         
+  pmaxsw      xmm5,xmm12                                                     
+  pminsw      xmm5,xmm11                                                     
+  pand        xmm5,xmm4                                                      
+  pand        xmm5,[rbp+150h]                                    
+  paddw       xmm6,xmm5                                                      
+  movdqa      xmm5,[rbp+0C0h]                                    
+  packuswb    xmm7,xmm6                                                      
+  movdqa      xmm6,[rbp+130h]                                    
+  paddw       xmm5,xmm6                                                      
+  packuswb    xmm5,xmm8                                                      
+  movdqa      xmm8,[rbp+0D0h]                                    
+  psubw       xmm8,xmm6                                                      
+  movdqa      xmm6,[rbp+0F0h]                                    
+  paddw       xmm6,xmm0                                                      
+  movdqa      xmm0,[rbp+0E0h]                                    
+  packuswb    xmm8,xmm9                                                      
+  movdqa      xmm9,xmm0                                                      
+  paddw       xmm9,xmm0                                                      
+  psubw       xmm6,xmm9                                                      
+  psraw       xmm6,1                                                         
+  pmaxsw      xmm14,xmm6                                                     
+  pminsw      xmm2,xmm14                                                     
+  pand        xmm2,xmm3                                                      
+  pand        xmm2,[rbp+110h]                                    
+  paddw       xmm0,xmm2                                                      
+  movdqa      xmm2,[rbp+140h]                                    
+  paddw       xmm2,xmm15                                                     
+  movdqa      xmm15,xmm1                                                     
+  paddw       xmm15,xmm1                                                     
+  psubw       xmm2,xmm15                                                     
+  psraw       xmm2,1                                                         
+  pmaxsw      xmm12,xmm2                                                     
+  pminsw      xmm11,xmm12                                                    
+  pand        xmm11,xmm4                                                     
+  pand        xmm11,[rbp+160h]                                   
+  paddw       xmm1,xmm11                                                     
+  movdqa      [rax+rcx],xmm7                                     
+  movdqa      [r10],xmm5                                         
+  packuswb    xmm0,xmm1                                                      
+  movdqa      [rcx],xmm8                                         
+  movdqa      [r12+rcx],xmm0                                                                        
+  mov         r12,qword [rbp+180h]                                       
+  lea         rsp,[rbp+190h]                                                 
+  pop         rbp                                                            
+  ret                                                                        
+
+
+WELS_EXTERN   DeblockLumaEq4V_sse2
+
+ALIGN  16
+DeblockLumaEq4V_sse2:
+  mov         rax,rsp 
+  push        rbx  
+  push        rbp  
+  push        rsi  
+  push        rdi  
+  sub         rsp,1D8h 
+  movaps      [rax-38h],xmm6 
+  movaps      [rax-48h],xmm7 
+  movaps      [rax-58h],xmm8 
+  pxor        xmm1,xmm1 
+  movsxd      r10,edx 
+  mov         rbp,rcx 
+  mov         r11d,r8d 
+  mov         rdx,rcx 
+  mov         rdi,rbp 
+  mov         rbx,rbp 
+  movdqa      xmm5,[rbp] 
+  movaps      [rax-68h],xmm9 
+  movaps      [rax-78h],xmm10 
+  punpcklbw   xmm5,xmm1 
+  movaps      [rax-88h],xmm11 
+  movaps      [rax-98h],xmm12 
+  movaps      [rax-0A8h],xmm13 
+  movaps      [rax-0B8h],xmm14 
+  movdqa      xmm14,[r10+rbp] 
+  movaps      [rax-0C8h],xmm15 
+  lea         eax,[r10*4] 
+  movsxd      r8,eax 
+  lea         eax,[r10+r10*2] 
+  movsxd      rcx,eax 
+  lea         eax,[r10+r10] 
+  sub         rdx,r8 
+  punpcklbw   xmm14,xmm1 
+  movdqa      [rsp+90h],xmm5 
+  movdqa      [rsp+30h],xmm14 
+  movsxd      rsi,eax 
+  movsx       eax,r11w 
+  sub         rdi,rcx 
+  sub         rbx,rsi 
+  mov         r8,rbp 
+  sub         r8,r10 
+  movd        xmm0,eax 
+  movsx       eax,r9w 
+  movdqa      xmm12,[rdi] 
+  movdqa      xmm6, [rsi+rbp] 
+  movdqa      xmm13,[rbx] 
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm11,xmm0,0 
+  punpcklbw   xmm13,xmm1 
+  punpcklbw   xmm6,xmm1 
+  movdqa      xmm8,[r8] 
+  movd        xmm0,eax 
+  movdqa      xmm10,xmm11 
+  mov         eax,2 
+  punpcklbw   xmm8,xmm1 
+  punpcklbw   xmm12,xmm1 
+  cwde             
+  punpcklwd   xmm0,xmm0 
+  psraw       xmm10,2 
+  movdqa      xmm1,xmm8 
+  movdqa      [rsp+0F0h],xmm13 
+  movdqa      [rsp+0B0h],xmm8 
+  pshufd      xmm7,xmm0,0 
+  psubw       xmm1,xmm13 
+  movdqa      xmm0,xmm5 
+  movdqa      xmm4,xmm7 
+  movdqa      xmm2,xmm7 
+  psubw       xmm0,xmm8 
+  pabsw       xmm3,xmm0 
+  pabsw       xmm0,xmm1 
+  movdqa      xmm1,xmm5 
+  movdqa      [rsp+40h],xmm7 
+  movdqa      [rsp+60h],xmm6 
+  pcmpgtw     xmm4,xmm0 
+  psubw       xmm1,xmm14 
+  pabsw       xmm0,xmm1 
+  pcmpgtw     xmm2,xmm0 
+  pand        xmm4,xmm2 
+  movdqa      xmm0,xmm11 
+  pcmpgtw     xmm0,xmm3 
+  pand        xmm4,xmm0 
+  movd        xmm0,eax 
+  movdqa      [rsp+20h],xmm4 
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm2,xmm0,0 
+  paddw       xmm10,xmm2 
+  movdqa      [rsp+0A0h],xmm2 
+  movdqa      xmm15,xmm7 
+  pxor        xmm4,xmm4 
+  movdqa      xmm0,xmm8 
+  psubw       xmm0,xmm12 
+  mov         eax,4 
+  pabsw       xmm0,xmm0 
+  movdqa      xmm1,xmm10 
+  cwde             
+  pcmpgtw     xmm15,xmm0 
+  pcmpgtw     xmm1,xmm3 
+  movdqa      xmm3,xmm7 
+  movdqa      xmm7,[rdx] 
+  movdqa      xmm0,xmm5 
+  psubw       xmm0,xmm6 
+  pand        xmm15,xmm1 
+  punpcklbw   xmm7,xmm4 
+  movdqa      xmm9,xmm15 
+  pabsw       xmm0,xmm0 
+  psllw       xmm7,1 
+  pandn       xmm9,xmm12 
+  pcmpgtw     xmm3,xmm0 
+  paddw       xmm7,xmm12 
+  movd        xmm0,eax 
+  pand        xmm3,xmm1 
+  paddw       xmm7,xmm12 
+  punpcklwd   xmm0,xmm0 
+  paddw       xmm7,xmm12 
+  pshufd      xmm1,xmm0,0 
+  paddw       xmm7,xmm13 
+  movdqa      xmm0,xmm3 
+  pandn       xmm0,xmm6 
+  paddw       xmm7,xmm8 
+  movdqa      [rsp+70h],xmm1 
+  paddw       xmm7,xmm5 
+  movdqa      [rsp+120h],xmm0 
+  movdqa      xmm0,[rcx+rbp] 
+  punpcklbw   xmm0,xmm4 
+  paddw       xmm7,xmm1 
+  movdqa      xmm4,xmm15 
+  psllw       xmm0,1 
+  psraw       xmm7,3 
+  paddw       xmm0,xmm6 
+  pand        xmm7,xmm15 
+  paddw       xmm0,xmm6 
+  paddw       xmm0,xmm6 
+  paddw       xmm0,xmm14 
+  movdqa      xmm6,xmm15 
+  paddw       xmm0,xmm5 
+  pandn       xmm6,xmm13 
+  paddw       xmm0,xmm8 
+  paddw       xmm0,xmm1 
+  psraw       xmm0,3 
+  movdqa      xmm1,xmm12 
+  paddw       xmm1,xmm13 
+  pand        xmm0,xmm3 
+  movdqa      [rsp+100h],xmm0 
+  movdqa      xmm0,xmm8 
+  paddw       xmm0,xmm5 
+  paddw       xmm1,xmm0 
+  movdqa      xmm0,xmm3 
+  paddw       xmm1,xmm2 
+  psraw       xmm1,2 
+  pandn       xmm0,xmm14 
+  pand        xmm4,xmm1 
+  movdqa      [rsp+0E0h],xmm0 
+  movdqa      xmm0,xmm5 
+  paddw       xmm0,xmm8 
+  movdqa      xmm1,[rsp+60h] 
+  paddw       xmm1,xmm14 
+  movdqa      xmm14,xmm3 
+  paddw       xmm1,xmm0 
+  movdqa      xmm0,xmm8 
+  paddw       xmm0,[rsp+30h] 
+  paddw       xmm1,xmm2 
+  psraw       xmm1,2 
+  pand        xmm14,xmm1 
+  movdqa      xmm1,xmm13 
+  paddw       xmm1,xmm13 
+  paddw       xmm1,xmm0 
+  paddw       xmm1,xmm2 
+  psraw       xmm1,2 
+  movdqa      xmm0,[rsp+30h] 
+  movdqa      xmm2,xmm13 
+  movdqa      xmm5,xmm15 
+  paddw       xmm0,[rsp+70h] 
+  pandn       xmm5,xmm1 
+  paddw       xmm2,xmm8 
+  movdqa      xmm8,[rsp+90h] 
+  movdqa      xmm1,xmm12 
+  paddw       xmm2,xmm8 
+  psllw       xmm2,1 
+  paddw       xmm2,xmm0 
+  paddw       xmm1,xmm2 
+  movdqa      xmm0,xmm8 
+  movdqa      xmm8,xmm3 
+  movdqa      xmm2,[rsp+30h] 
+  paddw       xmm0,xmm13 
+  psraw       xmm1,3 
+  pand        xmm15,xmm1 
+  movdqa      xmm1,xmm2 
+  paddw       xmm1,xmm2 
+  paddw       xmm2,[rsp+90h] 
+  paddw       xmm2,[rsp+0B0h] 
+  paddw       xmm1,xmm0 
+  movdqa      xmm0,xmm13 
+  movdqa      xmm13,[r8] 
+  paddw       xmm0, [rsp+70h] 
+  paddw       xmm1, [rsp+0A0h] 
+  psllw       xmm2,1 
+  paddw       xmm2,xmm0 
+  psraw       xmm1,2 
+  movdqa      xmm0, [rdi] 
+  pandn       xmm8,xmm1 
+  movdqa      xmm1, [rsp+60h] 
+  paddw       xmm1,xmm2 
+  movdqa      xmm2, [rbx] 
+  psraw       xmm1,3 
+  pand        xmm3,xmm1 
+  movdqa      xmm1, [rbp] 
+  movdqa      [rsp+0D0h],xmm3 
+  pxor        xmm3,xmm3 
+  punpckhbw   xmm0,xmm3 
+  punpckhbw   xmm1,xmm3 
+  punpckhbw   xmm13,xmm3 
+  movdqa      [rsp+0C0h],xmm0 
+  movdqa      xmm0,[r10+rbp] 
+  movdqa      [rsp],xmm1 
+  punpckhbw   xmm0,xmm3 
+  punpckhbw   xmm2,xmm3 
+  movdqa      [rsp+80h],xmm0 
+  movdqa      xmm0,[rsi+rbp] 
+  movdqa      [rsp+10h],xmm13 
+  punpckhbw   xmm0,xmm3 
+  movdqa      [rsp+50h],xmm0 
+  movdqa      xmm0,xmm1 
+  movdqa      xmm1,xmm13 
+  psubw       xmm0,xmm13 
+  psubw       xmm1,xmm2 
+  pabsw       xmm3,xmm0 
+  pabsw       xmm0,xmm1 
+  movdqa      xmm1,[rsp] 
+  movdqa      xmm13,[rsp+40h] 
+  movdqa      [rsp+110h],xmm2 
+  psubw       xmm1, [rsp+80h] 
+  pcmpgtw     xmm13,xmm0 
+  pcmpgtw     xmm11,xmm3 
+  pabsw       xmm0,xmm1 
+  pcmpgtw     xmm10,xmm3 
+  movdqa      xmm1, [rsp+40h] 
+  movdqa      xmm2,xmm1 
+  movdqa      xmm3,xmm1 
+  pcmpgtw     xmm2,xmm0 
+  movdqa      xmm0, [rsp+10h] 
+  pand        xmm13,xmm2 
+  pand        xmm13,xmm11 
+  movdqa      xmm11,[rsp+0C0h] 
+  psubw       xmm0,xmm11 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm3,xmm0 
+  pand        xmm3,xmm10 
+  movdqa      xmm0,[rsp] 
+  psubw       xmm0,[rsp+50h] 
+  movdqa      xmm2,[rdx] 
+  pabsw       xmm0,xmm0 
+  por         xmm7,xmm9 
+  movdqa      xmm9,[rsp+20h] 
+  pcmpgtw     xmm1,xmm0 
+  pand        xmm9,xmm7 
+  movdqa      xmm7,[rsp+20h] 
+  movdqa      xmm0,xmm7 
+  pandn       xmm0,xmm12 
+  movdqa      xmm12,[rsp+110h] 
+  pand        xmm1,xmm10 
+  movdqa      xmm10,[rsp+70h] 
+  movdqa      [rsp+40h],xmm1 
+  movdqa      xmm1,xmm13 
+  por         xmm9,xmm0 
+  pxor        xmm0,xmm0 
+  por         xmm4,xmm6 
+  movdqa      xmm6,xmm7 
+  punpckhbw   xmm2,xmm0 
+  por         xmm15,xmm5 
+  movdqa      xmm5,[rsp+20h] 
+  movdqa      xmm0,xmm3 
+  psllw       xmm2,1 
+  pandn       xmm0,xmm11 
+  pand        xmm6,xmm4 
+  movdqa      xmm4,[rsp] 
+  paddw       xmm2,xmm11 
+  pand        xmm5,xmm15 
+  movdqa      xmm15,[rsp+20h] 
+  paddw       xmm2,xmm11 
+  paddw       xmm2,xmm11 
+  paddw       xmm2,xmm12 
+  paddw       xmm2,[rsp+10h] 
+  paddw       xmm2,[rsp] 
+  paddw       xmm2,xmm10 
+  psraw       xmm2,3 
+  pand        xmm2,xmm3 
+  por         xmm2,xmm0 
+  pand        xmm1,xmm2 
+  movdqa      xmm0,xmm13 
+  movdqa      xmm2,xmm11 
+  pandn       xmm0,xmm11 
+  paddw       xmm2,xmm12 
+  por         xmm1,xmm0 
+  packuswb    xmm9,xmm1 
+  movdqa      xmm0,xmm7 
+  movdqa      xmm7,[rsp+0A0h] 
+  pandn       xmm0,[rsp+0F0h] 
+  movdqa      xmm1,xmm3 
+  por         xmm6,xmm0 
+  movdqa      xmm0,[rsp+10h] 
+  paddw       xmm0,xmm4 
+  paddw       xmm2,xmm0 
+  paddw       xmm2,xmm7 
+  movdqa      xmm0,xmm3 
+  pandn       xmm0,xmm12 
+  psraw       xmm2,2 
+  pand        xmm1,xmm2 
+  por         xmm1,xmm0 
+  movdqa      xmm2,xmm13 
+  movdqa      xmm0,xmm13 
+  pand        xmm2,xmm1 
+  pandn       xmm0,xmm12 
+  movdqa      xmm1,xmm12 
+  paddw       xmm1,[rsp+10h] 
+  por         xmm2,xmm0 
+  movdqa      xmm0,xmm15 
+  pandn       xmm0,[rsp+0B0h] 
+  paddw       xmm1,xmm4 
+  packuswb    xmm6,xmm2 
+  movdqa      xmm2,xmm3 
+  psllw       xmm1,1 
+  por         xmm5,xmm0 
+  movdqa      xmm0,[rsp+80h] 
+  paddw       xmm0,xmm10 
+  paddw       xmm1,xmm0 
+  paddw       xmm11,xmm1 
+  psraw       xmm11,3 
+  movdqa      xmm1,xmm12 
+  pand        xmm2,xmm11 
+  paddw       xmm1,xmm12 
+  movdqa      xmm11,[rsp+80h] 
+  movdqa      xmm0, [rsp+10h] 
+  por         xmm14,[rsp+0E0h] 
+  paddw       xmm0,xmm11 
+  movdqa      xmm4,xmm15 
+  paddw       xmm1,xmm0 
+  movdqa      xmm0,xmm13 
+  paddw       xmm1,xmm7 
+  psraw       xmm1,2 
+  pandn       xmm3,xmm1 
+  por         xmm2,xmm3 
+  movdqa      xmm1,xmm13 
+  movdqa      xmm3,[rsp+10h] 
+  pandn       xmm0,xmm3 
+  pand        xmm1,xmm2 
+  movdqa      xmm2,xmm11 
+  paddw       xmm2,[rsp] 
+  por         xmm1,xmm0 
+  movdqa      xmm0,[rsp+0D0h] 
+  por         xmm0,xmm8 
+  paddw       xmm2,xmm3 
+  packuswb    xmm5,xmm1 
+  movdqa      xmm8,[rsp+40h] 
+  movdqa      xmm1,[rsp+50h] 
+  movdqa      xmm3,xmm8 
+  pand        xmm4,xmm0 
+  psllw       xmm2,1 
+  movdqa      xmm0,xmm15 
+  pandn       xmm0,[rsp+90h] 
+  por         xmm4,xmm0 
+  movdqa      xmm0,xmm12 
+  paddw       xmm0,xmm10 
+  paddw       xmm2,xmm0 
+  paddw       xmm1,xmm2 
+  movdqa      xmm0,[rsp] 
+  movdqa      xmm2,xmm11 
+  paddw       xmm0,xmm12 
+  movdqa      xmm12,[rsp] 
+  paddw       xmm2,xmm11 
+  paddw       xmm2,xmm0 
+  psraw       xmm1,3 
+  movdqa      xmm0,xmm8 
+  pand        xmm3,xmm1 
+  paddw       xmm2,xmm7 
+  movdqa      xmm1,xmm13 
+  psraw       xmm2,2 
+  pandn       xmm0,xmm2 
+  por         xmm3,xmm0 
+  movdqa      xmm2,[rsp+50h] 
+  movdqa      xmm0,xmm13 
+  pandn       xmm0,xmm12 
+  pand        xmm1,xmm3 
+  paddw       xmm2,xmm11 
+  movdqa      xmm3,xmm15 
+  por         xmm1,xmm0 
+  pand        xmm3,xmm14 
+  movdqa      xmm14,[rsp+10h] 
+  movdqa      xmm0,xmm15 
+  pandn       xmm0,[rsp+30h] 
+  packuswb    xmm4,xmm1 
+  movdqa      xmm1,xmm8 
+  por         xmm3,xmm0 
+  movdqa      xmm0,xmm12 
+  paddw       xmm0,xmm14 
+  paddw       xmm2,xmm0 
+  paddw       xmm2,xmm7 
+  movdqa      xmm0,xmm8 
+  pandn       xmm0,xmm11 
+  psraw       xmm2,2 
+  pand        xmm1,xmm2 
+  por         xmm1,xmm0 
+  movdqa      xmm2,xmm13 
+  movdqa      xmm0,xmm13 
+  pandn       xmm0,xmm11 
+  pand        xmm2,xmm1 
+  movdqa      xmm1,xmm15 
+  por         xmm2,xmm0 
+  packuswb    xmm3,xmm2 
+  movdqa      xmm0,[rsp+100h] 
+  por         xmm0,[rsp+120h] 
+  pand        xmm1,xmm0 
+  movdqa      xmm2,[rcx+rbp] 
+  movdqa      xmm7,[rsp+50h] 
+  pandn       xmm15,[rsp+60h] 
+  lea         r11,[rsp+1D8h] 
+  pxor        xmm0,xmm0 
+  por         xmm1,xmm15 
+  movaps      xmm15,[r11-0A8h] 
+  movdqa      [rdi],xmm9 
+  movaps      xmm9,[r11-48h] 
+  punpckhbw   xmm2,xmm0 
+  psllw       xmm2,1 
+  paddw       xmm2,xmm7 
+  paddw       xmm2,xmm7 
+  movdqa      [rbx],xmm6 
+  movaps      xmm6,[r11-18h] 
+  paddw       xmm2,xmm7 
+  paddw       xmm2,xmm11 
+  movaps      xmm11,[r11-68h] 
+  paddw       xmm2,xmm12 
+  movaps      xmm12,[r11-78h] 
+  paddw       xmm2,xmm14 
+  paddw       xmm2,xmm10 
+  psraw       xmm2,3 
+  movaps      xmm10,[r11-58h] 
+  movaps      xmm14,[r11-98h] 
+  movdqa      xmm0,xmm13 
+  pand        xmm2,xmm8 
+  pandn       xmm8,xmm7 
+  pandn       xmm13,xmm7 
+  por         xmm2,xmm8 
+  movaps      xmm7,[r11-28h] 
+  movaps      xmm8,[r11-38h] 
+  movdqa      [r8],xmm5 
+  pand        xmm0,xmm2 
+  por         xmm0,xmm13 
+  packuswb    xmm1,xmm0 
+  movaps      xmm13,[r11-88h] 
+  movdqa      [rbp],xmm4 
+  movdqa      [r10+rbp],xmm3 
+  movdqa      [rsi+rbp],xmm1 
+  mov         rsp,r11 
+  pop         rdi  
+  pop         rsi  
+  pop         rbp  
+  pop         rbx  
+  ret
+
+
+WELS_EXTERN  DeblockChromaLt4V_sse2
+
+ALIGN  16
+DeblockChromaLt4V_sse2:
+  mov         rax,rsp 
+  push        rbx  
+  push        rdi     
+  sub         rsp,0C8h 
+  mov         r10,qword [rax + 30h]  ; pTC
+  pxor        xmm1,xmm1 
+  mov         rbx,rcx 
+  movsxd      r11,r8d 
+  movsx       ecx,byte [r10] 
+  movsx       r8d,byte [r10+2] 
+  mov         rdi,rdx 
+  movq        xmm2,[rbx] 
+  movq        xmm9,[r11+rbx] 
+  movsx       edx,byte [r10+1] 
+  mov         word [rsp+2],cx 
+  mov         word [rsp],cx 
+  movsx       eax,byte [r10+3] 
+  mov         word [rsp+6],dx 
+  mov         word [rsp+4],dx 
+  movdqa      xmm11,xmm1 
+  mov         word [rsp+0Eh],ax 
+  mov         word [rsp+0Ch],ax 
+  lea         eax,[r11+r11] 
+  movsxd      rcx,eax 
+  mov         rax,rbx 
+  mov         rdx,rdi 
+  sub         rax,rcx 
+  mov         word [rsp+0Ah],r8w 
+  mov         word [rsp+8],r8w 
+  movdqa      xmm6,[rsp] 
+  movdqa      xmm7,xmm6 
+  movq        xmm13, [rax] 
+  mov         rax,rdi 
+  sub         rax,rcx 
+  mov         rcx,rbx 
+  pcmpgtw     xmm7,xmm1 
+  psubw       xmm11,xmm6 
+  sub         rcx,r11 
+  sub         rdx,r11 
+  movq        xmm0,[rax] 
+  movsx       eax,r9w 
+  movq        xmm15,[rcx] 
+  punpcklqdq  xmm13,xmm0 
+  movq        xmm0, [rdx] 
+  movdqa      xmm4,xmm13 
+  punpcklqdq  xmm15,xmm0 
+  movq        xmm0, [rdi] 
+  punpcklbw   xmm4,xmm1 
+  movdqa      xmm12,xmm15 
+  punpcklqdq  xmm2,xmm0 
+  movq        xmm0, [r11+rdi] 
+  punpcklbw   xmm12,xmm1 
+  movdqa      xmm14,xmm2 
+  punpcklqdq  xmm9,xmm0 
+  punpckhbw   xmm2,xmm1 
+  punpcklbw   xmm14,xmm1 
+  movd        xmm0,eax 
+  movsx       eax,word [rsp + 0C8h + 38h] ; iBeta
+  punpckhbw   xmm13,xmm1 
+  punpckhbw   xmm15,xmm1 
+  movdqa      xmm3,xmm9 
+  movdqa      [rsp+10h],xmm2 
+  punpcklwd   xmm0,xmm0 
+  punpckhbw   xmm9,xmm1 
+  punpcklbw   xmm3,xmm1 
+  movdqa      xmm1,xmm14 
+  pshufd      xmm10,xmm0,0 
+  movd        xmm0,eax 
+  mov         eax,4 
+  cwde             
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm8,xmm0,0 
+  movd        xmm0,eax 
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm5,xmm0,0 
+  psubw       xmm1,xmm12 
+  movdqa      xmm2,xmm10 
+  lea         r11,[rsp+0C8h] 
+  psllw       xmm1,2 
+  movdqa      xmm0,xmm4 
+  psubw       xmm4,xmm12 
+  psubw       xmm0,xmm3 
+  psubw       xmm3,xmm14 
+  paddw       xmm1,xmm0 
+  paddw       xmm1,xmm5 
+  movdqa      xmm0,xmm11 
+  psraw       xmm1,3 
+  pmaxsw      xmm0,xmm1 
+  pminsw      xmm6,xmm0 
+  movdqa      xmm1,xmm8 
+  movdqa      xmm0,xmm12 
+  psubw       xmm0,xmm14 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm2,xmm0 
+  pabsw       xmm0,xmm4 
+  pcmpgtw     xmm1,xmm0 
+  pabsw       xmm0,xmm3 
+  movdqa      xmm3,[rsp] 
+  pand        xmm2,xmm1 
+  movdqa      xmm1,xmm8 
+  pcmpgtw     xmm1,xmm0 
+  movdqa      xmm0,xmm13 
+  pand        xmm2,xmm1 
+  psubw       xmm0,xmm9 
+  psubw       xmm13,xmm15 
+  pand        xmm2,xmm7 
+  pand        xmm6,xmm2 
+  paddw       xmm12,xmm6 
+  psubw       xmm14,xmm6 
+  movdqa      xmm2,[rsp+10h] 
+  movaps      xmm6,[r11-18h] 
+  movdqa      xmm1,xmm2 
+  psubw       xmm1,xmm15 
+  psubw       xmm9,xmm2 
+  psllw       xmm1,2 
+  paddw       xmm1,xmm0 
+  paddw       xmm1,xmm5 
+  movdqa      xmm0,xmm15 
+  psubw       xmm0,xmm2 
+  psraw       xmm1,3 
+  pmaxsw      xmm11,xmm1 
+  pabsw       xmm0,xmm0 
+  movdqa      xmm1,xmm8 
+  pcmpgtw     xmm10,xmm0 
+  pabsw       xmm0,xmm13 
+  pminsw      xmm3,xmm11 
+  movaps      xmm11,[r11-68h] 
+  movaps      xmm13,[rsp+40h] 
+  pcmpgtw     xmm1,xmm0 
+  pabsw       xmm0,xmm9 
+  movaps      xmm9, [r11-48h] 
+  pand        xmm10,xmm1 
+  pcmpgtw     xmm8,xmm0 
+  pand        xmm10,xmm8 
+  pand        xmm10,xmm7 
+  movaps      xmm8,[r11-38h] 
+  movaps      xmm7,[r11-28h] 
+  pand        xmm3,xmm10 
+  paddw       xmm15,xmm3 
+  psubw       xmm2,xmm3 
+  movaps      xmm10,[r11-58h] 
+  packuswb    xmm12,xmm15 
+  movaps      xmm15,[rsp+20h] 
+  packuswb    xmm14,xmm2 
+  movq        [rcx],xmm12 
+  movq        [rbx],xmm14 
+  psrldq      xmm12,8 
+  psrldq      xmm14,8 
+  movq        [rdx],xmm12 
+  movaps      xmm12,[r11-78h] 
+  movq        [rdi],xmm14 
+  movaps      xmm14,[rsp+30h] 
+  mov         rsp,r11 
+  pop         rdi  
+  pop         rbx  
+  ret
+
+
+WELS_EXTERN   DeblockChromaEq4V_sse2
+ALIGN 16
+DeblockChromaEq4V_sse2:
+  mov         rax,rsp 
+  push        rbx  
+  sub         rsp,90h 
+  pxor        xmm1,xmm1 
+  mov         r11,rcx 
+  mov         rbx,rdx 
+  mov         r10d,r9d   
+  movq        xmm13,[r11] 
+  lea         eax,[r8+r8] 
+  movsxd      r9,eax 
+  mov         rax,rcx 
+  sub         rax,r9 
+  movq        xmm14,[rax] 
+  mov         rax,rdx 
+  sub         rax,r9 
+  movq        xmm0,[rax] 
+  movsxd      rax,r8d 
+  sub         rcx,rax 
+  sub         rdx,rax 
+  movq        xmm12,[rax+r11] 
+  movq        xmm10,[rcx] 
+  punpcklqdq  xmm14,xmm0 
+  movdqa      xmm8,xmm14 
+  movq        xmm0,[rdx] 
+  punpcklbw   xmm8,xmm1 
+  punpckhbw   xmm14,xmm1 
+  punpcklqdq  xmm10,xmm0 
+  movq        xmm0,[rbx] 
+  movdqa      xmm5,xmm10 
+  punpcklqdq  xmm13,xmm0 
+  movq        xmm0, [rax+rbx] 
+  punpcklbw   xmm5,xmm1 
+  movsx       eax,r10w 
+  movdqa      xmm9,xmm13 
+  punpcklqdq  xmm12,xmm0 
+  punpcklbw   xmm9,xmm1 
+  punpckhbw   xmm10,xmm1 
+  movd        xmm0,eax 
+  movsx       eax,word [rsp + 90h + 8h + 28h]   ; iBeta
+  punpckhbw   xmm13,xmm1 
+  movdqa      xmm7,xmm12 
+  punpcklwd   xmm0,xmm0 
+  punpckhbw   xmm12,xmm1 
+  pshufd      xmm11,xmm0,0 
+  punpcklbw   xmm7,xmm1 
+  movd        xmm0,eax 
+  movdqa      xmm1,xmm8 
+  psubw       xmm1,xmm5 
+  punpcklwd   xmm0,xmm0 
+  movdqa      xmm6,xmm11 
+  pshufd      xmm3,xmm0,0 
+  movdqa      xmm0,xmm5 
+  psubw       xmm0,xmm9 
+  movdqa      xmm2,xmm3 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm6,xmm0 
+  pabsw       xmm0,xmm1 
+  movdqa      xmm1,xmm3 
+  pcmpgtw     xmm2,xmm0 
+  pand        xmm6,xmm2 
+  movdqa      xmm0,xmm7 
+  movdqa      xmm2,xmm3 
+  psubw       xmm0,xmm9 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm1,xmm0 
+  pand        xmm6,xmm1 
+  movdqa      xmm0,xmm10 
+  movdqa      xmm1,xmm14 
+  psubw       xmm0,xmm13 
+  psubw       xmm1,xmm10 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm11,xmm0 
+  pabsw       xmm0,xmm1 
+  pcmpgtw     xmm2,xmm0 
+  pand        xmm11,xmm2 
+  movdqa      xmm0,xmm12 
+  movdqa      xmm4,xmm6 
+  movdqa      xmm1,xmm8 
+  mov         eax,2 
+  cwde             
+  paddw       xmm1,xmm8 
+  psubw       xmm0,xmm13 
+  paddw       xmm1,xmm5 
+  pabsw       xmm0,xmm0 
+  movdqa      xmm2,xmm14 
+  paddw       xmm1,xmm7 
+  pcmpgtw     xmm3,xmm0 
+  paddw       xmm2,xmm14 
+  movd        xmm0,eax 
+  pand        xmm11,xmm3 
+  paddw       xmm7,xmm7 
+  paddw       xmm2,xmm10 
+  punpcklwd   xmm0,xmm0 
+  paddw       xmm2,xmm12 
+  paddw       xmm12,xmm12 
+  pshufd      xmm3,xmm0,0 
+  paddw       xmm7,xmm9 
+  paddw       xmm12,xmm13 
+  movdqa      xmm0,xmm6 
+  paddw       xmm1,xmm3 
+  pandn       xmm0,xmm5 
+  paddw       xmm7,xmm8 
+  psraw       xmm1,2 
+  paddw       xmm12,xmm14 
+  paddw       xmm7,xmm3 
+  movaps      xmm14,[rsp] 
+  pand        xmm4,xmm1 
+  paddw       xmm12,xmm3 
+  psraw       xmm7,2 
+  movdqa      xmm1,xmm11 
+  por         xmm4,xmm0 
+  psraw       xmm12,2 
+  paddw       xmm2,xmm3 
+  movdqa      xmm0,xmm11 
+  pandn       xmm0,xmm10 
+  psraw       xmm2,2 
+  pand        xmm1,xmm2 
+  por         xmm1,xmm0 
+  packuswb    xmm4,xmm1 
+  movdqa      xmm0,xmm11 
+  movdqa      xmm1,xmm6 
+  pand        xmm1,xmm7 
+  movaps      xmm7,[rsp+70h] 
+  movq        [rcx],xmm4 
+  pandn       xmm6,xmm9 
+  pandn       xmm11,xmm13 
+  pand        xmm0,xmm12 
+  por         xmm1,xmm6 
+  por         xmm0,xmm11 
+  psrldq      xmm4,8 
+  packuswb    xmm1,xmm0 
+  movq        [r11],xmm1 
+  psrldq      xmm1,8 
+  movq        [rdx],xmm4 
+  lea         r11,[rsp+90h] 
+  movaps      xmm6,[r11-10h] 
+  movaps      xmm8,[r11-30h] 
+  movaps      xmm9,[r11-40h] 
+  movq        [rbx],xmm1 
+  movaps      xmm10,[r11-50h] 
+  movaps      xmm11,[r11-60h] 
+  movaps      xmm12,[r11-70h] 
+  movaps      xmm13,[r11-80h] 
+  mov         rsp,r11 
+  pop         rbx  
+  ret
+
+
+
+
+
+WELS_EXTERN   DeblockChromaEq4H_sse2
+ALIGN  16
+DeblockChromaEq4H_sse2:
+  mov         rax,rsp 
+  mov         [rax+20h],rbx 
+  push        rdi  
+  sub         rsp,140h    
+  mov         rdi,rdx 
+  lea         eax,[r8*4] 
+  movsxd      r10,eax 
+  mov         eax,[rcx-2] 
+  mov         [rsp+10h],eax 
+  lea         rbx,[r10+rdx-2] 
+  lea         r11,[r10+rcx-2] 
+  movdqa      xmm5,[rsp+10h] 
+  movsxd      r10,r8d 
+  mov         eax,[r10+rcx-2] 
+  lea         rdx,[r10+r10*2] 
+  mov         [rsp+20h],eax 
+  mov         eax,[rcx+r10*2-2] 
+  mov         [rsp+30h],eax 
+  mov         eax,[rdx+rcx-2] 
+  movdqa      xmm2,[rsp+20h] 
+  mov         [rsp+40h],eax 
+  mov         eax, [rdi-2] 
+  movdqa      xmm4,[rsp+30h] 
+  mov         [rsp+50h],eax 
+  mov         eax,[r10+rdi-2] 
+  movdqa      xmm3,[rsp+40h] 
+  mov         [rsp+60h],eax 
+  mov         eax,[rdi+r10*2-2] 
+  punpckldq   xmm5,[rsp+50h] 
+  mov         [rsp+70h],eax 
+  mov         eax, [rdx+rdi-2] 
+  punpckldq   xmm2, [rsp+60h] 
+  mov          [rsp+80h],eax 
+  mov         eax,[r11] 
+  punpckldq   xmm4, [rsp+70h] 
+  mov         [rsp+50h],eax 
+  mov         eax,[rbx] 
+  punpckldq   xmm3,[rsp+80h] 
+  mov         [rsp+60h],eax 
+  mov         eax,[r10+r11] 
+  movdqa      xmm0, [rsp+50h] 
+  punpckldq   xmm0, [rsp+60h] 
+  punpcklqdq  xmm5,xmm0 
+  movdqa      [rsp+50h],xmm0 
+  mov         [rsp+50h],eax 
+  mov         eax,[r10+rbx] 
+  movdqa      xmm0,[rsp+50h] 
+  movdqa      xmm1,xmm5 
+  mov         [rsp+60h],eax 
+  mov         eax,[r11+r10*2] 
+  punpckldq   xmm0, [rsp+60h] 
+  punpcklqdq  xmm2,xmm0 
+  punpcklbw   xmm1,xmm2 
+  punpckhbw   xmm5,xmm2 
+  movdqa      [rsp+50h],xmm0 
+  mov         [rsp+50h],eax 
+  mov         eax,[rbx+r10*2] 
+  movdqa      xmm0,[rsp+50h] 
+  mov         [rsp+60h],eax 
+  mov         eax, [rdx+r11] 
+  movdqa      xmm15,xmm1 
+  punpckldq   xmm0,[rsp+60h] 
+  punpcklqdq  xmm4,xmm0 
+  movdqa      [rsp+50h],xmm0 
+  mov         [rsp+50h],eax 
+  mov         eax, [rdx+rbx] 
+  movdqa      xmm0,[rsp+50h] 
+  mov         [rsp+60h],eax 
+  punpckldq   xmm0, [rsp+60h] 
+  punpcklqdq  xmm3,xmm0 
+  movdqa      xmm0,xmm4 
+  punpcklbw   xmm0,xmm3 
+  punpckhbw   xmm4,xmm3 
+  punpcklwd   xmm15,xmm0 
+  punpckhwd   xmm1,xmm0 
+  movdqa      xmm0,xmm5 
+  movdqa      xmm12,xmm15 
+  punpcklwd   xmm0,xmm4 
+  punpckhwd   xmm5,xmm4 
+  punpckldq   xmm12,xmm0 
+  punpckhdq   xmm15,xmm0 
+  movdqa      xmm0,xmm1 
+  movdqa      xmm11,xmm12 
+  punpckldq   xmm0,xmm5 
+  punpckhdq   xmm1,xmm5 
+  punpcklqdq  xmm11,xmm0 
+  punpckhqdq  xmm12,xmm0 
+  movsx       eax,r9w 
+  movdqa      xmm14,xmm15 
+  punpcklqdq  xmm14,xmm1 
+  punpckhqdq  xmm15,xmm1 
+  pxor        xmm1,xmm1 
+  movd        xmm0,eax 
+  movdqa      xmm4,xmm12 
+  movdqa      xmm8,xmm11 
+  movsx       eax,word [rsp+170h] ; iBeta
+  punpcklwd   xmm0,xmm0 
+  punpcklbw   xmm4,xmm1 
+  punpckhbw   xmm12,xmm1 
+  movdqa      xmm9,xmm14 
+  movdqa      xmm7,xmm15 
+  movdqa      xmm10,xmm15 
+  pshufd      xmm13,xmm0,0 
+  punpcklbw   xmm9,xmm1 
+  punpckhbw   xmm14,xmm1 
+  movdqa      xmm6,xmm13 
+  movd        xmm0,eax 
+  movdqa      [rsp],xmm11 
+  mov         eax,2 
+  cwde             
+  punpckhbw   xmm11,xmm1 
+  punpckhbw   xmm10,xmm1 
+  punpcklbw   xmm7,xmm1 
+  punpcklwd   xmm0,xmm0 
+  punpcklbw   xmm8,xmm1 
+  pshufd      xmm3,xmm0,0 
+  movdqa      xmm1,xmm8 
+  movdqa      xmm0,xmm4 
+  psubw       xmm0,xmm9 
+  psubw       xmm1,xmm4 
+  movdqa      xmm2,xmm3 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm6,xmm0 
+  pabsw       xmm0,xmm1 
+  movdqa      xmm1,xmm3 
+  pcmpgtw     xmm2,xmm0 
+  pand        xmm6,xmm2 
+  movdqa      xmm0,xmm7 
+  movdqa      xmm2,xmm3 
+  psubw       xmm0,xmm9 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm1,xmm0 
+  pand        xmm6,xmm1 
+  movdqa      xmm0,xmm12 
+  movdqa      xmm1,xmm11 
+  psubw       xmm0,xmm14 
+  psubw       xmm1,xmm12 
+  movdqa      xmm5,xmm6 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm13,xmm0 
+  pabsw       xmm0,xmm1 
+  movdqa      xmm1,xmm8 
+  pcmpgtw     xmm2,xmm0 
+  paddw       xmm1,xmm8 
+  movdqa      xmm0,xmm10 
+  pand        xmm13,xmm2 
+  psubw       xmm0,xmm14 
+  paddw       xmm1,xmm4 
+  movdqa      xmm2,xmm11 
+  pabsw       xmm0,xmm0 
+  paddw       xmm2,xmm11 
+  paddw       xmm1,xmm7 
+  pcmpgtw     xmm3,xmm0 
+  paddw       xmm2,xmm12 
+  movd        xmm0,eax 
+  pand        xmm13,xmm3 
+  paddw       xmm2,xmm10 
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm3,xmm0,0 
+  movdqa      xmm0,xmm6 
+  paddw       xmm1,xmm3 
+  pandn       xmm0,xmm4 
+  paddw       xmm2,xmm3 
+  psraw       xmm1,2 
+  pand        xmm5,xmm1 
+  por         xmm5,xmm0 
+  paddw       xmm7,xmm7 
+  paddw       xmm10,xmm10 
+  psraw       xmm2,2 
+  movdqa      xmm1,xmm13 
+  movdqa      xmm0,xmm13 
+  pandn       xmm0,xmm12 
+  pand        xmm1,xmm2 
+  paddw       xmm7,xmm9 
+  por         xmm1,xmm0 
+  paddw       xmm10,xmm14 
+  paddw       xmm7,xmm8 
+  movdqa      xmm0,xmm13 
+  packuswb    xmm5,xmm1 
+  paddw       xmm7,xmm3 
+  paddw       xmm10,xmm11 
+  movdqa      xmm1,xmm6 
+  paddw       xmm10,xmm3 
+  pandn       xmm6,xmm9 
+  psraw       xmm7,2 
+  pand        xmm1,xmm7 
+  psraw       xmm10,2 
+  pandn       xmm13,xmm14 
+  pand        xmm0,xmm10 
+  por         xmm1,xmm6 
+  movdqa      xmm6,[rsp] 
+  movdqa      xmm4,xmm6 
+  por         xmm0,xmm13 
+  punpcklbw   xmm4,xmm5 
+  punpckhbw   xmm6,xmm5 
+  movdqa      xmm3,xmm4 
+  packuswb    xmm1,xmm0 
+  movdqa      xmm0,xmm1 
+  punpckhbw   xmm1,xmm15 
+  punpcklbw   xmm0,xmm15 
+  punpcklwd   xmm3,xmm0 
+  punpckhwd   xmm4,xmm0 
+  movdqa      xmm0,xmm6 
+  movdqa      xmm2,xmm3 
+  punpcklwd   xmm0,xmm1 
+  punpckhwd   xmm6,xmm1 
+  movdqa      xmm1,xmm4 
+  punpckldq   xmm2,xmm0 
+  punpckhdq   xmm3,xmm0 
+  punpckldq   xmm1,xmm6 
+  movdqa      xmm0,xmm2 
+  punpcklqdq  xmm0,xmm1 
+  punpckhdq   xmm4,xmm6 
+  punpckhqdq  xmm2,xmm1 
+  movdqa      [rsp+10h],xmm0 
+  movdqa      [rsp+60h],xmm2 
+  movdqa      xmm0,xmm3 
+  mov         eax,[rsp+10h] 
+  mov         [rcx-2],eax 
+  mov         eax,[rsp+60h] 
+  punpcklqdq  xmm0,xmm4 
+  punpckhqdq  xmm3,xmm4 
+  mov         [r10+rcx-2],eax 
+  movdqa      [rsp+20h],xmm0 
+  mov         eax, [rsp+20h] 
+  movdqa      [rsp+70h],xmm3 
+  mov         [rcx+r10*2-2],eax 
+  mov         eax,[rsp+70h] 
+  mov         [rdx+rcx-2],eax 
+  mov         eax,[rsp+18h] 
+  mov         [r11],eax 
+  mov         eax,[rsp+68h] 
+  mov         [r10+r11],eax 
+  mov         eax,[rsp+28h] 
+  mov         [r11+r10*2],eax 
+  mov         eax,[rsp+78h] 
+  mov         [rdx+r11],eax 
+  mov         eax,[rsp+14h] 
+  mov         [rdi-2],eax 
+  mov         eax,[rsp+64h] 
+  mov         [r10+rdi-2],eax 
+  mov         eax,[rsp+24h] 
+  mov         [rdi+r10*2-2],eax 
+  mov         eax, [rsp+74h] 
+  mov         [rdx+rdi-2],eax 
+  mov         eax, [rsp+1Ch] 
+  mov         [rbx],eax 
+  mov         eax, [rsp+6Ch] 
+  mov         [r10+rbx],eax 
+  mov         eax,[rsp+2Ch] 
+  mov         [rbx+r10*2],eax 
+  mov         eax,[rsp+7Ch] 
+  mov         [rdx+rbx],eax  
+  lea         r11,[rsp+140h] 
+  mov         rbx, [r11+28h]    
+  mov         rsp,r11 
+  pop         rdi  
+  ret
+
+
+
+WELS_EXTERN DeblockChromaLt4H_sse2
+ALIGN  16
+DeblockChromaLt4H_sse2:
+  mov         rax,rsp 
+  push        rbx  
+  push        rbp  
+  push        rsi  
+  push        rdi  
+  push        r12  
+  sub         rsp,170h  
+  
+  movsxd      rsi,r8d 
+  lea         eax,[r8*4] 
+  mov         r11d,r9d 
+  movsxd      r10,eax 
+  mov         eax, [rcx-2] 
+  mov         r12,rdx 
+  mov         [rsp+40h],eax 
+  mov         eax, [rsi+rcx-2] 
+  lea         rbx,[r10+rcx-2] 
+  movdqa      xmm5,[rsp+40h] 
+  mov         [rsp+50h],eax 
+  mov         eax, [rcx+rsi*2-2] 
+  lea         rbp,[r10+rdx-2] 
+  movdqa      xmm2, [rsp+50h] 
+  mov         [rsp+60h],eax 
+  lea         r10,[rsi+rsi*2] 
+  mov         rdi,rcx 
+  mov         eax,[r10+rcx-2] 
+  movdqa      xmm4,[rsp+60h] 
+  mov         [rsp+70h],eax 
+  mov         eax,[rdx-2] 
+  mov         [rsp+80h],eax 
+  mov         eax, [rsi+rdx-2] 
+  movdqa      xmm3,[rsp+70h] 
+  mov         [rsp+90h],eax 
+  mov         eax,[rdx+rsi*2-2] 
+  punpckldq   xmm5,[rsp+80h] 
+  mov         [rsp+0A0h],eax 
+  mov         eax, [r10+rdx-2] 
+  punpckldq   xmm2,[rsp+90h] 
+  mov         [rsp+0B0h],eax 
+  mov         eax, [rbx] 
+  punpckldq   xmm4,[rsp+0A0h] 
+  mov         [rsp+80h],eax 
+  mov         eax,[rbp] 
+  punpckldq   xmm3,[rsp+0B0h] 
+  mov         [rsp+90h],eax 
+  mov         eax,[rsi+rbx] 
+  movdqa      xmm0,[rsp+80h] 
+  punpckldq   xmm0,[rsp+90h] 
+  punpcklqdq  xmm5,xmm0 
+  movdqa      [rsp+80h],xmm0 
+  mov         [rsp+80h],eax 
+  mov         eax,[rsi+rbp] 
+  movdqa      xmm0,[rsp+80h] 
+  movdqa      xmm1,xmm5 
+  mov         [rsp+90h],eax 
+  mov         eax,[rbx+rsi*2] 
+  punpckldq   xmm0,[rsp+90h] 
+  punpcklqdq  xmm2,xmm0 
+  punpcklbw   xmm1,xmm2 
+  punpckhbw   xmm5,xmm2 
+  movdqa      [rsp+80h],xmm0 
+  mov         [rsp+80h],eax 
+  mov         eax,[rbp+rsi*2] 
+  movdqa      xmm0, [rsp+80h] 
+  mov         [rsp+90h],eax 
+  mov         eax,[r10+rbx] 
+  movdqa      xmm7,xmm1 
+  punpckldq   xmm0,[rsp+90h] 
+  punpcklqdq  xmm4,xmm0 
+  movdqa      [rsp+80h],xmm0 
+  mov         [rsp+80h],eax 
+  mov         eax, [r10+rbp] 
+  movdqa      xmm0,[rsp+80h] 
+  mov         [rsp+90h],eax 
+  punpckldq   xmm0,[rsp+90h] 
+  punpcklqdq  xmm3,xmm0 
+  movdqa      xmm0,xmm4 
+  punpcklbw   xmm0,xmm3 
+  punpckhbw   xmm4,xmm3 
+  punpcklwd   xmm7,xmm0 
+  punpckhwd   xmm1,xmm0 
+  movdqa      xmm0,xmm5 
+  movdqa      xmm6,xmm7 
+  punpcklwd   xmm0,xmm4 
+  punpckhwd   xmm5,xmm4 
+  punpckldq   xmm6,xmm0 
+  punpckhdq   xmm7,xmm0 
+  movdqa      xmm0,xmm1 
+  punpckldq   xmm0,xmm5 
+  mov         rax, [rsp+1C8h]    ; pTC
+  punpckhdq   xmm1,xmm5 
+  movdqa      xmm9,xmm6 
+  punpckhqdq  xmm6,xmm0 
+  punpcklqdq  xmm9,xmm0 
+  movdqa      xmm2,xmm7 
+  movdqa      xmm13,xmm6 
+  movdqa      xmm4,xmm9 
+  movdqa      [rsp+10h],xmm9 
+  punpcklqdq  xmm2,xmm1 
+  punpckhqdq  xmm7,xmm1 
+  pxor        xmm1,xmm1 
+  movsx       ecx,byte [rax+3] 
+  movsx       edx,byte [rax+2] 
+  movsx       r8d,byte [rax+1] 
+  movsx       r9d,byte [rax] 
+  movdqa      xmm10,xmm1 
+  movdqa      xmm15,xmm2 
+  punpckhbw   xmm2,xmm1 
+  punpckhbw   xmm6,xmm1 
+  punpcklbw   xmm4,xmm1 
+  movsx       eax,r11w 
+  mov         word [rsp+0Eh],cx 
+  mov         word [rsp+0Ch],cx 
+  movdqa      xmm3,xmm7 
+  movdqa      xmm8,xmm7 
+  movdqa      [rsp+20h],xmm7 
+  punpcklbw   xmm15,xmm1 
+  punpcklbw   xmm13,xmm1 
+  punpcklbw   xmm3,xmm1 
+  mov         word [rsp+0Ah],dx 
+  mov         word [rsp+8],dx 
+  mov         word [rsp+6],r8w 
+  movd        xmm0,eax 
+  movdqa      [rsp+30h],xmm6 
+  punpckhbw   xmm9,xmm1 
+  punpckhbw   xmm8,xmm1 
+  punpcklwd   xmm0,xmm0 
+  movsx       eax,word [rsp+1C0h]   ; iBeta
+  mov         word [rsp+4],r8w 
+  mov         word [rsp+2],r9w 
+  pshufd      xmm12,xmm0,0 
+  mov         word [rsp],r9w 
+  movd        xmm0,eax 
+  mov         eax,4 
+  cwde             
+  movdqa      xmm14, [rsp] 
+  movdqa      [rsp],xmm2 
+  movdqa      xmm2,xmm12 
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm11,xmm0,0 
+  psubw       xmm10,xmm14 
+  movd        xmm0,eax 
+  movdqa      xmm7,xmm14 
+  movdqa      xmm6,xmm14 
+  pcmpgtw     xmm7,xmm1 
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm5,xmm0,0 
+  movdqa      xmm0,xmm4 
+  movdqa      xmm1,xmm15 
+  psubw       xmm4,xmm13 
+  psubw       xmm0,xmm3 
+  psubw       xmm1,xmm13 
+  psubw       xmm3,xmm15 
+  psllw       xmm1,2 
+  paddw       xmm1,xmm0 
+  paddw       xmm1,xmm5 
+  movdqa      xmm0,xmm10 
+  psraw       xmm1,3 
+  pmaxsw      xmm0,xmm1 
+  pminsw      xmm6,xmm0 
+  movdqa      xmm1,xmm11 
+  movdqa      xmm0,xmm13 
+  psubw       xmm0,xmm15 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm2,xmm0 
+  pabsw       xmm0,xmm4 
+  pcmpgtw     xmm1,xmm0 
+  pabsw       xmm0,xmm3 
+  pand        xmm2,xmm1 
+  movdqa      xmm1,xmm11 
+  movdqa      xmm3,[rsp+30h] 
+  pcmpgtw     xmm1,xmm0 
+  movdqa      xmm0,xmm9 
+  pand        xmm2,xmm1 
+  psubw       xmm0,xmm8 
+  psubw       xmm9,xmm3 
+  pand        xmm2,xmm7 
+  pand        xmm6,xmm2 
+  psubw       xmm15,xmm6 
+  paddw       xmm13,xmm6 
+  movdqa      xmm2,[rsp] 
+  movdqa      xmm1,xmm2 
+  psubw       xmm1,xmm3 
+  psubw       xmm8,xmm2 
+  psllw       xmm1,2 
+  paddw       xmm1,xmm0 
+  paddw       xmm1,xmm5 
+  movdqa      xmm0,xmm3 
+  movdqa      xmm5,[rsp+10h] 
+  psubw       xmm0,xmm2 
+  psraw       xmm1,3 
+  movdqa      xmm4,xmm5 
+  pabsw       xmm0,xmm0 
+  pmaxsw      xmm10,xmm1 
+  movdqa      xmm1,xmm11 
+  pcmpgtw     xmm12,xmm0 
+  pabsw       xmm0,xmm9 
+  pminsw      xmm14,xmm10 
+  pcmpgtw     xmm1,xmm0 
+  pabsw       xmm0,xmm8 
+  pcmpgtw     xmm11,xmm0 
+  pand        xmm12,xmm1 
+  movdqa      xmm1,[rsp+20h] 
+  pand        xmm12,xmm11 
+  pand        xmm12,xmm7 
+  pand        xmm14,xmm12 
+  paddw       xmm3,xmm14 
+  psubw       xmm2,xmm14 
+  packuswb    xmm13,xmm3 
+  packuswb    xmm15,xmm2 
+  punpcklbw   xmm4,xmm13 
+  punpckhbw   xmm5,xmm13 
+  movdqa      xmm0,xmm15 
+  punpcklbw   xmm0,xmm1 
+  punpckhbw   xmm15,xmm1 
+  movdqa      xmm3,xmm4 
+  punpcklwd   xmm3,xmm0 
+  punpckhwd   xmm4,xmm0 
+  movdqa      xmm0,xmm5 
+  movdqa      xmm2,xmm3 
+  movdqa      xmm1,xmm4 
+  punpcklwd   xmm0,xmm15 
+  punpckhwd   xmm5,xmm15 
+  punpckldq   xmm2,xmm0 
+  punpckhdq   xmm3,xmm0 
+  punpckldq   xmm1,xmm5 
+  movdqa      xmm0,xmm2 
+  punpcklqdq  xmm0,xmm1 
+  punpckhdq   xmm4,xmm5 
+  punpckhqdq  xmm2,xmm1 
+  movdqa      [rsp+40h],xmm0 
+  movdqa      xmm0,xmm3 
+  movdqa      [rsp+90h],xmm2 
+  mov         eax,[rsp+40h] 
+  mov         [rdi-2],eax 
+  mov         eax, [rsp+90h] 
+  punpcklqdq  xmm0,xmm4 
+  punpckhqdq  xmm3,xmm4 
+  mov         [rsi+rdi-2],eax 
+  movdqa      [rsp+50h],xmm0 
+  mov         eax,[rsp+50h] 
+  movdqa      [rsp+0A0h],xmm3 
+  mov         [rdi+rsi*2-2],eax 
+  mov         eax,[rsp+0A0h] 
+  mov         [r10+rdi-2],eax 
+  mov         eax,[rsp+48h] 
+  mov         [rbx],eax 
+  mov         eax,[rsp+98h] 
+  mov         [rsi+rbx],eax 
+  mov         eax,[rsp+58h] 
+  mov         [rbx+rsi*2],eax 
+  mov         eax, [rsp+0A8h] 
+  mov         [r10+rbx],eax 
+  mov         eax, [rsp+44h] 
+  mov         [r12-2],eax 
+  mov         eax,[rsp+94h] 
+  mov         [rsi+r12-2],eax 
+  mov         eax,[rsp+54h] 
+  mov         [r12+rsi*2-2],eax 
+  mov         eax, [rsp+0A4h] 
+  mov         [r10+r12-2],eax 
+  mov         eax,[rsp+4Ch] 
+  mov         [rbp],eax 
+  mov         eax,[rsp+9Ch] 
+  mov         [rsi+rbp],eax 
+  mov         eax, [rsp+5Ch] 
+  mov         [rbp+rsi*2],eax 
+  mov         eax,[rsp+0ACh] 
+  mov         [r10+rbp],eax   
+  lea         r11,[rsp+170h]    
+  mov         rsp,r11 
+  pop         r12  
+  pop         rdi  
+  pop         rsi  
+  pop         rbp  
+  pop         rbx  
+  ret 
+
+
+
+%elifdef  UNIX64
+
+
+WELS_EXTERN   DeblockLumaLt4V_sse2
+
+DeblockLumaLt4V_sse2:
+  push        rbp      
+  mov         r11,r8  ; pTC                                                    
+  sub         rsp,1B0h                                                       
+  lea         rbp,[rsp+20h]                                                  
+  movd        xmm4,edx                                                                                                  
+  movd        xmm2,ecx                                                       
+  mov         qword [rbp+180h],r12                                       
+  mov         r10,rdi                                                        
+  movsxd      r12,esi                                                        
+  add         rsi,rsi
+  movsxd      rdx,esi 
+  sub         r10,r12                                                        
+  movsx       r8d,byte [r11]                                             
+  pxor        xmm3,xmm3                                                      
+  punpcklwd   xmm2,xmm2                                                      
+  movaps      [rbp+50h],xmm14                                    
+  lea         rax,[r12+r12*2]                                                
+  movdqa      xmm14,[rdx+rdi]                                    
+  neg         rax                                                            
+  pshufd      xmm0,xmm2,0                                                    
+  movd        xmm2,r8d                                                       
+  movsx       rsi,byte [r11+1]                                           
+  movsx       r8d,byte [r11+2]                                           
+  movsx       r11d,byte [r11+3]                                          
+  movaps      [rbp+70h],xmm12                                    
+  movd        xmm1,esi                                                      
+  movaps      [rbp+80h],xmm11                                    
+  movd        xmm12,r8d                                                      
+  movd        xmm11,r11d                                                     
+  movdqa      xmm5, [rax+rdi]                                     
+  lea         rax,[r12+r12]                                                  
+  punpcklwd   xmm12,xmm12                                                    
+  neg         rax                                                            
+  punpcklwd   xmm11,xmm11                                                    
+  movaps      [rbp],xmm8                                         
+  movdqa      xmm8, [r10]                                         
+  punpcklwd   xmm2,xmm2                                                      
+  punpcklwd   xmm1,xmm1                                                      
+  punpcklqdq  xmm12,xmm12                                                    
+  punpcklqdq  xmm11,xmm11                                                    
+  punpcklqdq  xmm2,xmm2                                                      
+  punpcklqdq  xmm1,xmm1                                                      
+  shufps      xmm12,xmm11,88h                                                
+  movdqa      xmm11,xmm8                                                     
+  movaps      [rbp+30h],xmm9                                     
+  movdqa      xmm9,[rdi]                                         
+  shufps      xmm2,xmm1,88h                                                  
+  movdqa      xmm1,xmm5                                                      
+  punpcklbw   xmm11,xmm3                                                     
+  movaps      [rbp+20h],xmm6                                     
+  movaps      [rbp+60h],xmm13                                    
+  movdqa      xmm13,xmm11                                                    
+  movaps      [rbp+90h],xmm10                                    
+  movdqa      xmm10,xmm9                                                     
+  movdqa      xmm6,[rax+rdi]                                     
+  punpcklbw   xmm1,xmm3                                                      
+  movaps      [rbp+0A0h],xmm12                                   
+  psubw       xmm13,xmm1                                                     
+  movaps      [rbp+40h],xmm15                                    
+  movdqa      xmm15,xmm14                                                    
+  movaps      [rbp+10h],xmm7                                     
+  movdqa      xmm7,xmm6                                                      
+  punpcklbw   xmm10,xmm3                                                     
+  movdqa      xmm12,[r12+rdi]                                    
+  punpcklbw   xmm7,xmm3                                                      
+  punpcklbw   xmm12,xmm3                                                     
+  punpcklbw   xmm15,xmm3                                                     
+  pabsw       xmm3,xmm13                                                     
+  movdqa      xmm13,xmm10                                                    
+  psubw       xmm13,xmm15                                                    
+  movdqa      [rbp+0F0h],xmm15                                   
+  pabsw       xmm15,xmm13                                                    
+  movdqa      xmm13,xmm11                                                    
+  movdqa      [rbp+0B0h],xmm1                                    
+  movdqa      xmm1,xmm0                                                      
+  pavgw       xmm13,xmm10                                                    
+  pcmpgtw     xmm1,xmm3                                                      
+  movdqa      [rbp+120h],xmm13                                   
+  movaps      xmm13,xmm2                                                     
+  punpcklwd   xmm4,xmm4                                                      
+  movdqa      xmm3,xmm0                                                      
+  movdqa      [rbp+100h],xmm1                                    
+  psubw       xmm13,xmm1                                                     
+  movdqa      xmm1,xmm10                                                     
+  pcmpgtw     xmm3,xmm15                                                     
+  pshufd      xmm4,xmm4,0                                                    
+  psubw       xmm1,xmm11                                                     
+  movdqa      [rbp+0D0h],xmm10                                   
+  psubw       xmm13,xmm3                                                     
+  movdqa      [rbp+110h],xmm3                                    
+  pabsw       xmm15,xmm1                                                     
+  movdqa      xmm3,xmm4                                                      
+  psubw       xmm10,xmm12                                                    
+  pcmpgtw     xmm3,xmm15                                                     
+  pabsw       xmm15,xmm10                                                    
+  movdqa      xmm10,xmm0                                                     
+  psllw       xmm1,2                                                         
+  movdqa      [rbp+0C0h],xmm11                                   
+  psubw       xmm11,xmm7                                                     
+  pcmpgtw     xmm10,xmm15                                                    
+  pabsw       xmm11,xmm11                                                    
+  movdqa      xmm15,xmm0                                                     
+  pand        xmm3,xmm10                                                     
+  pcmpgtw     xmm15,xmm11                                                    
+  movaps      xmm11,xmm2                                                     
+  pxor        xmm10,xmm10                                                    
+  pand        xmm3,xmm15                                                     
+  pcmpgtw     xmm11,xmm10                                                    
+  pcmpeqw     xmm10,xmm2                                                     
+  por         xmm11,xmm10                                                    
+  pand        xmm3,xmm11                                                     
+  movdqa      xmm11,xmm7                                                     
+  psubw       xmm11,xmm12                                                    
+  pxor        xmm15,xmm15                                                    
+  paddw       xmm11,xmm1                                                     
+  psubw       xmm15,xmm13                                                    
+  movdqa      [rbp+0E0h],xmm12                                   
+  paddw       xmm11,[FOUR_16B_SSE2] 
+  pxor        xmm12,xmm12                                                    
+  psraw       xmm11,3                                                        
+  punpckhbw   xmm8,xmm12                                                     
+  pmaxsw      xmm15,xmm11                                                    
+  punpckhbw   xmm5,xmm12                                                     
+  movdqa      xmm11,xmm8                                                     
+  pminsw      xmm13,xmm15                                                    
+  psubw       xmm11,xmm5                                                     
+  punpckhbw   xmm9,xmm12                                                     
+  pand        xmm13,xmm3                                                     
+  movdqa      [rbp+130h],xmm13                                   
+  pabsw       xmm13,xmm11                                                    
+  punpckhbw   xmm14,xmm12                                                    
+  movdqa      xmm11,xmm9                                                     
+  psubw       xmm11,xmm14                                                    
+  movdqa      xmm15,xmm0                                                     
+  movdqa      [rbp+140h],xmm14                                   
+  pabsw       xmm14,xmm11                                                    
+  movdqa      xmm11,xmm8                                                     
+  pcmpgtw     xmm15,xmm14                                                    
+  movdqa      xmm1,[r12+rdi]                                     
+  pavgw       xmm11,xmm9                                                     
+  movdqa      [rbp+170h],xmm11                                   
+  movdqa      xmm10,xmm9                                                     
+  punpckhbw   xmm6,xmm12                                                     
+  psubw       xmm10,xmm8                                                     
+  punpckhbw   xmm1,xmm12                                                     
+  movdqa      xmm12,xmm0                                                     
+  movaps      xmm11,[rbp+0A0h]                                   
+  pcmpgtw     xmm12,xmm13                                                    
+  movaps      xmm13,xmm11                                                    
+  psubw       xmm13,xmm12                                                    
+  movdqa      [rbp+160h],xmm15                                   
+  psubw       xmm13,xmm15                                                    
+  movdqa      xmm15,xmm9                                                     
+  psubw       xmm15,xmm1                                                     
+  movdqa      [rbp+150h],xmm12                                   
+  pabsw       xmm12,xmm10                                                    
+  pabsw       xmm14,xmm15                                                    
+  movdqa      xmm15,xmm8                                                     
+  pcmpgtw     xmm4,xmm12                                                     
+  movdqa      xmm12,xmm0                                                     
+  psubw       xmm15,xmm6                                                     
+  pcmpgtw     xmm12,xmm14                                                    
+  pabsw       xmm14,xmm15                                                    
+  psllw       xmm10,2                                                        
+  pcmpgtw     xmm0,xmm14                                                     
+  movdqa      xmm14,xmm6                                                     
+  psubw       xmm14,xmm1                                                     
+  pand        xmm4,xmm12                                                     
+  paddw       xmm14,xmm10                                                    
+  pand        xmm4,xmm0                                                      
+  paddw       xmm14,[FOUR_16B_SSE2] 
+  pxor        xmm15,xmm15                                                    
+  movaps      xmm12,xmm11                                                    
+  psubw       xmm15,xmm13                                                    
+  pxor        xmm0,xmm0                                                      
+  psraw       xmm14,3                                                        
+  pcmpgtw     xmm12,xmm0                                                     
+  pcmpeqw     xmm0,xmm11                                                     
+  pmaxsw      xmm15,xmm14                                                    
+  por         xmm12,xmm0                                                     
+  movdqa      xmm0,[rbp+120h]                                    
+  pminsw      xmm13,xmm15                                                    
+  movdqa      xmm15,[rbp+0B0h]                                   
+  movdqa      xmm10,xmm7                                                     
+  pand        xmm4,xmm12                                                     
+  paddw       xmm15,xmm0                                                     
+  pxor        xmm12,xmm12                                                    
+  paddw       xmm10,xmm7                                                     
+  movdqa      xmm14,xmm12                                                    
+  psubw       xmm15,xmm10                                                    
+  psubw       xmm14,xmm2                                                     
+  psraw       xmm15,1                                                        
+  pmaxsw      xmm15,xmm14                                                    
+  movdqa      xmm10,xmm6                                                     
+  pminsw      xmm15,xmm2                                                     
+  paddw       xmm10,xmm6                                                     
+  pand        xmm15,xmm3                                                     
+  psubw       xmm12,xmm11                                                    
+  pand        xmm15,[rbp+100h]                                   
+  pand        xmm13,xmm4                                                     
+  paddw       xmm7,xmm15                                                     
+  paddw       xmm8,xmm13                                                     
+  movdqa      xmm15,[rbp+170h]                                   
+  psubw       xmm9,xmm13                                                     
+  paddw       xmm5,xmm15                                                     
+  psubw       xmm5,xmm10                                                     
+  psraw       xmm5,1                                                         
+  pmaxsw      xmm5,xmm12                                                     
+  pminsw      xmm5,xmm11                                                     
+  pand        xmm5,xmm4                                                      
+  pand        xmm5,[rbp+150h]                                    
+  paddw       xmm6,xmm5                                                      
+  movdqa      xmm5,[rbp+0C0h]                                    
+  packuswb    xmm7,xmm6                                                      
+  movdqa      xmm6,[rbp+130h]                                    
+  paddw       xmm5,xmm6                                                      
+  packuswb    xmm5,xmm8                                                      
+  movdqa      xmm8,[rbp+0D0h]                                    
+  psubw       xmm8,xmm6                                                      
+  movdqa      xmm6,[rbp+0F0h]                                    
+  paddw       xmm6,xmm0                                                      
+  movdqa      xmm0,[rbp+0E0h]                                    
+  packuswb    xmm8,xmm9                                                      
+  movdqa      xmm9,xmm0                                                      
+  paddw       xmm9,xmm0                                                      
+  psubw       xmm6,xmm9                                                      
+  psraw       xmm6,1                                                         
+  pmaxsw      xmm14,xmm6                                                     
+  pminsw      xmm2,xmm14                                                     
+  pand        xmm2,xmm3                                                      
+  pand        xmm2,[rbp+110h]                                    
+  paddw       xmm0,xmm2                                                      
+  movdqa      xmm2,[rbp+140h]                                    
+  paddw       xmm2,xmm15                                                     
+  movdqa      xmm15,xmm1                                                     
+  paddw       xmm15,xmm1                                                     
+  psubw       xmm2,xmm15                                                     
+  psraw       xmm2,1                                                         
+  pmaxsw      xmm12,xmm2                                                     
+  pminsw      xmm11,xmm12                                                    
+  pand        xmm11,xmm4                                                     
+  pand        xmm11,[rbp+160h]                                   
+  paddw       xmm1,xmm11                                                     
+  movdqa      [rax+rdi],xmm7                                     
+  movdqa      [r10],xmm5                                         
+  packuswb    xmm0,xmm1                                                      
+  movdqa      [rdi],xmm8                                         
+  movdqa      [r12+rdi],xmm0                                                                        
+  mov         r12,qword [rbp+180h]                                       
+  lea         rsp,[rbp+190h]                                                 
+  pop         rbp                                                            
+  ret 
+
+
+WELS_EXTERN DeblockLumaEq4V_sse2
+
+ALIGN  16
+DeblockLumaEq4V_sse2:
+  mov         rax,rsp 
+  push        rbx  
+  push        rbp   
+  mov         r8,   rdx
+  mov         r9,   rcx
+  mov         rcx,  rdi
+  mov         rdx,  rsi
+  sub         rsp,1D8h 
+  movaps      [rax-38h],xmm6 
+  movaps      [rax-48h],xmm7 
+  movaps      [rax-58h],xmm8 
+  pxor        xmm1,xmm1 
+  movsxd      r10,edx 
+  mov         rbp,rcx 
+  mov         r11d,r8d 
+  mov         rdx,rcx 
+  mov         rdi,rbp 
+  mov         rbx,rbp 
+  movdqa      xmm5,[rbp] 
+  movaps      [rax-68h],xmm9 
+  movaps      [rax-78h],xmm10 
+  punpcklbw   xmm5,xmm1 
+  movaps      [rax-88h],xmm11 
+  movaps      [rax-98h],xmm12 
+  movaps      [rax-0A8h],xmm13 
+  movaps      [rax-0B8h],xmm14 
+  movdqa      xmm14,[r10+rbp] 
+  movaps      [rax-0C8h],xmm15 
+  lea         eax,[r10*4] 
+  movsxd      r8,eax 
+  lea         eax,[r10+r10*2] 
+  movsxd      rcx,eax 
+  lea         eax,[r10+r10] 
+  sub         rdx,r8 
+  punpcklbw   xmm14,xmm1 
+  movdqa      [rsp+90h],xmm5 
+  movdqa      [rsp+30h],xmm14 
+  movsxd      rsi,eax 
+  movsx       eax,r11w 
+  sub         rdi,rcx 
+  sub         rbx,rsi 
+  mov         r8,rbp 
+  sub         r8,r10 
+  movd        xmm0,eax 
+  movsx       eax,r9w 
+  movdqa      xmm12,[rdi] 
+  movdqa      xmm6, [rsi+rbp] 
+  movdqa      xmm13,[rbx] 
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm11,xmm0,0 
+  punpcklbw   xmm13,xmm1 
+  punpcklbw   xmm6,xmm1 
+  movdqa      xmm8,[r8] 
+  movd        xmm0,eax 
+  movdqa      xmm10,xmm11 
+  mov         eax,2 
+  punpcklbw   xmm8,xmm1 
+  punpcklbw   xmm12,xmm1 
+  cwde             
+  punpcklwd   xmm0,xmm0 
+  psraw       xmm10,2 
+  movdqa      xmm1,xmm8 
+  movdqa      [rsp+0F0h],xmm13 
+  movdqa      [rsp+0B0h],xmm8 
+  pshufd      xmm7,xmm0,0 
+  psubw       xmm1,xmm13 
+  movdqa      xmm0,xmm5 
+  movdqa      xmm4,xmm7 
+  movdqa      xmm2,xmm7 
+  psubw       xmm0,xmm8 
+  pabsw       xmm3,xmm0 
+  pabsw       xmm0,xmm1 
+  movdqa      xmm1,xmm5 
+  movdqa      [rsp+40h],xmm7 
+  movdqa      [rsp+60h],xmm6 
+  pcmpgtw     xmm4,xmm0 
+  psubw       xmm1,xmm14 
+  pabsw       xmm0,xmm1 
+  pcmpgtw     xmm2,xmm0 
+  pand        xmm4,xmm2 
+  movdqa      xmm0,xmm11 
+  pcmpgtw     xmm0,xmm3 
+  pand        xmm4,xmm0 
+  movd        xmm0,eax 
+  movdqa      [rsp+20h],xmm4 
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm2,xmm0,0 
+  paddw       xmm10,xmm2 
+  movdqa      [rsp+0A0h],xmm2 
+  movdqa      xmm15,xmm7 
+  pxor        xmm4,xmm4 
+  movdqa      xmm0,xmm8 
+  psubw       xmm0,xmm12 
+  mov         eax,4 
+  pabsw       xmm0,xmm0 
+  movdqa      xmm1,xmm10 
+  cwde             
+  pcmpgtw     xmm15,xmm0 
+  pcmpgtw     xmm1,xmm3 
+  movdqa      xmm3,xmm7 
+  movdqa      xmm7,[rdx] 
+  movdqa      xmm0,xmm5 
+  psubw       xmm0,xmm6 
+  pand        xmm15,xmm1 
+  punpcklbw   xmm7,xmm4 
+  movdqa      xmm9,xmm15 
+  pabsw       xmm0,xmm0 
+  psllw       xmm7,1 
+  pandn       xmm9,xmm12 
+  pcmpgtw     xmm3,xmm0 
+  paddw       xmm7,xmm12 
+  movd        xmm0,eax 
+  pand        xmm3,xmm1 
+  paddw       xmm7,xmm12 
+  punpcklwd   xmm0,xmm0 
+  paddw       xmm7,xmm12 
+  pshufd      xmm1,xmm0,0 
+  paddw       xmm7,xmm13 
+  movdqa      xmm0,xmm3 
+  pandn       xmm0,xmm6 
+  paddw       xmm7,xmm8 
+  movdqa      [rsp+70h],xmm1 
+  paddw       xmm7,xmm5 
+  movdqa      [rsp+120h],xmm0 
+  movdqa      xmm0,[rcx+rbp] 
+  punpcklbw   xmm0,xmm4 
+  paddw       xmm7,xmm1 
+  movdqa      xmm4,xmm15 
+  psllw       xmm0,1 
+  psraw       xmm7,3 
+  paddw       xmm0,xmm6 
+  pand        xmm7,xmm15 
+  paddw       xmm0,xmm6 
+  paddw       xmm0,xmm6 
+  paddw       xmm0,xmm14 
+  movdqa      xmm6,xmm15 
+  paddw       xmm0,xmm5 
+  pandn       xmm6,xmm13 
+  paddw       xmm0,xmm8 
+  paddw       xmm0,xmm1 
+  psraw       xmm0,3 
+  movdqa      xmm1,xmm12 
+  paddw       xmm1,xmm13 
+  pand        xmm0,xmm3 
+  movdqa      [rsp+100h],xmm0 
+  movdqa      xmm0,xmm8 
+  paddw       xmm0,xmm5 
+  paddw       xmm1,xmm0 
+  movdqa      xmm0,xmm3 
+  paddw       xmm1,xmm2 
+  psraw       xmm1,2 
+  pandn       xmm0,xmm14 
+  pand        xmm4,xmm1 
+  movdqa      [rsp+0E0h],xmm0 
+  movdqa      xmm0,xmm5 
+  paddw       xmm0,xmm8 
+  movdqa      xmm1,[rsp+60h] 
+  paddw       xmm1,xmm14 
+  movdqa      xmm14,xmm3 
+  paddw       xmm1,xmm0 
+  movdqa      xmm0,xmm8 
+  paddw       xmm0,[rsp+30h] 
+  paddw       xmm1,xmm2 
+  psraw       xmm1,2 
+  pand        xmm14,xmm1 
+  movdqa      xmm1,xmm13 
+  paddw       xmm1,xmm13 
+  paddw       xmm1,xmm0 
+  paddw       xmm1,xmm2 
+  psraw       xmm1,2 
+  movdqa      xmm0,[rsp+30h] 
+  movdqa      xmm2,xmm13 
+  movdqa      xmm5,xmm15 
+  paddw       xmm0,[rsp+70h] 
+  pandn       xmm5,xmm1 
+  paddw       xmm2,xmm8 
+  movdqa      xmm8,[rsp+90h] 
+  movdqa      xmm1,xmm12 
+  paddw       xmm2,xmm8 
+  psllw       xmm2,1 
+  paddw       xmm2,xmm0 
+  paddw       xmm1,xmm2 
+  movdqa      xmm0,xmm8 
+  movdqa      xmm8,xmm3 
+  movdqa      xmm2,[rsp+30h] 
+  paddw       xmm0,xmm13 
+  psraw       xmm1,3 
+  pand        xmm15,xmm1 
+  movdqa      xmm1,xmm2 
+  paddw       xmm1,xmm2 
+  paddw       xmm2,[rsp+90h] 
+  paddw       xmm2,[rsp+0B0h] 
+  paddw       xmm1,xmm0 
+  movdqa      xmm0,xmm13 
+  movdqa      xmm13,[r8] 
+  paddw       xmm0, [rsp+70h] 
+  paddw       xmm1, [rsp+0A0h] 
+  psllw       xmm2,1 
+  paddw       xmm2,xmm0 
+  psraw       xmm1,2 
+  movdqa      xmm0, [rdi] 
+  pandn       xmm8,xmm1 
+  movdqa      xmm1, [rsp+60h] 
+  paddw       xmm1,xmm2 
+  movdqa      xmm2, [rbx] 
+  psraw       xmm1,3 
+  pand        xmm3,xmm1 
+  movdqa      xmm1, [rbp] 
+  movdqa      [rsp+0D0h],xmm3 
+  pxor        xmm3,xmm3 
+  punpckhbw   xmm0,xmm3 
+  punpckhbw   xmm1,xmm3 
+  punpckhbw   xmm13,xmm3 
+  movdqa      [rsp+0C0h],xmm0 
+  movdqa      xmm0,[r10+rbp] 
+  movdqa      [rsp],xmm1 
+  punpckhbw   xmm0,xmm3 
+  punpckhbw   xmm2,xmm3 
+  movdqa      [rsp+80h],xmm0 
+  movdqa      xmm0,[rsi+rbp] 
+  movdqa      [rsp+10h],xmm13 
+  punpckhbw   xmm0,xmm3 
+  movdqa      [rsp+50h],xmm0 
+  movdqa      xmm0,xmm1 
+  movdqa      xmm1,xmm13 
+  psubw       xmm0,xmm13 
+  psubw       xmm1,xmm2 
+  pabsw       xmm3,xmm0 
+  pabsw       xmm0,xmm1 
+  movdqa      xmm1,[rsp] 
+  movdqa      xmm13,[rsp+40h] 
+  movdqa      [rsp+110h],xmm2 
+  psubw       xmm1, [rsp+80h] 
+  pcmpgtw     xmm13,xmm0 
+  pcmpgtw     xmm11,xmm3 
+  pabsw       xmm0,xmm1 
+  pcmpgtw     xmm10,xmm3 
+  movdqa      xmm1, [rsp+40h] 
+  movdqa      xmm2,xmm1 
+  movdqa      xmm3,xmm1 
+  pcmpgtw     xmm2,xmm0 
+  movdqa      xmm0, [rsp+10h] 
+  pand        xmm13,xmm2 
+  pand        xmm13,xmm11 
+  movdqa      xmm11,[rsp+0C0h] 
+  psubw       xmm0,xmm11 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm3,xmm0 
+  pand        xmm3,xmm10 
+  movdqa      xmm0,[rsp] 
+  psubw       xmm0,[rsp+50h] 
+  movdqa      xmm2,[rdx] 
+  pabsw       xmm0,xmm0 
+  por         xmm7,xmm9 
+  movdqa      xmm9,[rsp+20h] 
+  pcmpgtw     xmm1,xmm0 
+  pand        xmm9,xmm7 
+  movdqa      xmm7,[rsp+20h] 
+  movdqa      xmm0,xmm7 
+  pandn       xmm0,xmm12 
+  movdqa      xmm12,[rsp+110h] 
+  pand        xmm1,xmm10 
+  movdqa      xmm10,[rsp+70h] 
+  movdqa      [rsp+40h],xmm1 
+  movdqa      xmm1,xmm13 
+  por         xmm9,xmm0 
+  pxor        xmm0,xmm0 
+  por         xmm4,xmm6 
+  movdqa      xmm6,xmm7 
+  punpckhbw   xmm2,xmm0 
+  por         xmm15,xmm5 
+  movdqa      xmm5,[rsp+20h] 
+  movdqa      xmm0,xmm3 
+  psllw       xmm2,1 
+  pandn       xmm0,xmm11 
+  pand        xmm6,xmm4 
+  movdqa      xmm4,[rsp] 
+  paddw       xmm2,xmm11 
+  pand        xmm5,xmm15 
+  movdqa      xmm15,[rsp+20h] 
+  paddw       xmm2,xmm11 
+  paddw       xmm2,xmm11 
+  paddw       xmm2,xmm12 
+  paddw       xmm2,[rsp+10h] 
+  paddw       xmm2,[rsp] 
+  paddw       xmm2,xmm10 
+  psraw       xmm2,3 
+  pand        xmm2,xmm3 
+  por         xmm2,xmm0 
+  pand        xmm1,xmm2 
+  movdqa      xmm0,xmm13 
+  movdqa      xmm2,xmm11 
+  pandn       xmm0,xmm11 
+  paddw       xmm2,xmm12 
+  por         xmm1,xmm0 
+  packuswb    xmm9,xmm1 
+  movdqa      xmm0,xmm7 
+  movdqa      xmm7,[rsp+0A0h] 
+  pandn       xmm0,[rsp+0F0h] 
+  movdqa      xmm1,xmm3 
+  por         xmm6,xmm0 
+  movdqa      xmm0,[rsp+10h] 
+  paddw       xmm0,xmm4 
+  paddw       xmm2,xmm0 
+  paddw       xmm2,xmm7 
+  movdqa      xmm0,xmm3 
+  pandn       xmm0,xmm12 
+  psraw       xmm2,2 
+  pand        xmm1,xmm2 
+  por         xmm1,xmm0 
+  movdqa      xmm2,xmm13 
+  movdqa      xmm0,xmm13 
+  pand        xmm2,xmm1 
+  pandn       xmm0,xmm12 
+  movdqa      xmm1,xmm12 
+  paddw       xmm1,[rsp+10h] 
+  por         xmm2,xmm0 
+  movdqa      xmm0,xmm15 
+  pandn       xmm0,[rsp+0B0h] 
+  paddw       xmm1,xmm4 
+  packuswb    xmm6,xmm2 
+  movdqa      xmm2,xmm3 
+  psllw       xmm1,1 
+  por         xmm5,xmm0 
+  movdqa      xmm0,[rsp+80h] 
+  paddw       xmm0,xmm10 
+  paddw       xmm1,xmm0 
+  paddw       xmm11,xmm1 
+  psraw       xmm11,3 
+  movdqa      xmm1,xmm12 
+  pand        xmm2,xmm11 
+  paddw       xmm1,xmm12 
+  movdqa      xmm11,[rsp+80h] 
+  movdqa      xmm0, [rsp+10h] 
+  por         xmm14,[rsp+0E0h] 
+  paddw       xmm0,xmm11 
+  movdqa      xmm4,xmm15 
+  paddw       xmm1,xmm0 
+  movdqa      xmm0,xmm13 
+  paddw       xmm1,xmm7 
+  psraw       xmm1,2 
+  pandn       xmm3,xmm1 
+  por         xmm2,xmm3 
+  movdqa      xmm1,xmm13 
+  movdqa      xmm3,[rsp+10h] 
+  pandn       xmm0,xmm3 
+  pand        xmm1,xmm2 
+  movdqa      xmm2,xmm11 
+  paddw       xmm2,[rsp] 
+  por         xmm1,xmm0 
+  movdqa      xmm0,[rsp+0D0h] 
+  por         xmm0,xmm8 
+  paddw       xmm2,xmm3 
+  packuswb    xmm5,xmm1 
+  movdqa      xmm8,[rsp+40h] 
+  movdqa      xmm1,[rsp+50h] 
+  movdqa      xmm3,xmm8 
+  pand        xmm4,xmm0 
+  psllw       xmm2,1 
+  movdqa      xmm0,xmm15 
+  pandn       xmm0,[rsp+90h] 
+  por         xmm4,xmm0 
+  movdqa      xmm0,xmm12 
+  paddw       xmm0,xmm10 
+  paddw       xmm2,xmm0 
+  paddw       xmm1,xmm2 
+  movdqa      xmm0,[rsp] 
+  movdqa      xmm2,xmm11 
+  paddw       xmm0,xmm12 
+  movdqa      xmm12,[rsp] 
+  paddw       xmm2,xmm11 
+  paddw       xmm2,xmm0 
+  psraw       xmm1,3 
+  movdqa      xmm0,xmm8 
+  pand        xmm3,xmm1 
+  paddw       xmm2,xmm7 
+  movdqa      xmm1,xmm13 
+  psraw       xmm2,2 
+  pandn       xmm0,xmm2 
+  por         xmm3,xmm0 
+  movdqa      xmm2,[rsp+50h] 
+  movdqa      xmm0,xmm13 
+  pandn       xmm0,xmm12 
+  pand        xmm1,xmm3 
+  paddw       xmm2,xmm11 
+  movdqa      xmm3,xmm15 
+  por         xmm1,xmm0 
+  pand        xmm3,xmm14 
+  movdqa      xmm14,[rsp+10h] 
+  movdqa      xmm0,xmm15 
+  pandn       xmm0,[rsp+30h] 
+  packuswb    xmm4,xmm1 
+  movdqa      xmm1,xmm8 
+  por         xmm3,xmm0 
+  movdqa      xmm0,xmm12 
+  paddw       xmm0,xmm14 
+  paddw       xmm2,xmm0 
+  paddw       xmm2,xmm7 
+  movdqa      xmm0,xmm8 
+  pandn       xmm0,xmm11 
+  psraw       xmm2,2 
+  pand        xmm1,xmm2 
+  por         xmm1,xmm0 
+  movdqa      xmm2,xmm13 
+  movdqa      xmm0,xmm13 
+  pandn       xmm0,xmm11 
+  pand        xmm2,xmm1 
+  movdqa      xmm1,xmm15 
+  por         xmm2,xmm0 
+  packuswb    xmm3,xmm2 
+  movdqa      xmm0,[rsp+100h] 
+  por         xmm0,[rsp+120h] 
+  pand        xmm1,xmm0 
+  movdqa      xmm2,[rcx+rbp] 
+  movdqa      xmm7,[rsp+50h] 
+  pandn       xmm15,[rsp+60h] 
+  lea         r11,[rsp+1D8h] 
+  pxor        xmm0,xmm0 
+  por         xmm1,xmm15 
+  movaps      xmm15,[r11-0A8h] 
+  movdqa      [rdi],xmm9 
+  movaps      xmm9,[r11-48h] 
+  punpckhbw   xmm2,xmm0 
+  psllw       xmm2,1 
+  paddw       xmm2,xmm7 
+  paddw       xmm2,xmm7 
+  movdqa      [rbx],xmm6 
+  movaps      xmm6,[r11-18h] 
+  paddw       xmm2,xmm7 
+  paddw       xmm2,xmm11 
+  movaps      xmm11,[r11-68h] 
+  paddw       xmm2,xmm12 
+  movaps      xmm12,[r11-78h] 
+  paddw       xmm2,xmm14 
+  paddw       xmm2,xmm10 
+  psraw       xmm2,3 
+  movaps      xmm10,[r11-58h] 
+  movaps      xmm14,[r11-98h] 
+  movdqa      xmm0,xmm13 
+  pand        xmm2,xmm8 
+  pandn       xmm8,xmm7 
+  pandn       xmm13,xmm7 
+  por         xmm2,xmm8 
+  movaps      xmm7,[r11-28h] 
+  movaps      xmm8,[r11-38h] 
+  movdqa      [r8],xmm5 
+  pand        xmm0,xmm2 
+  por         xmm0,xmm13 
+  packuswb    xmm1,xmm0 
+  movaps      xmm13,[r11-88h] 
+  movdqa      [rbp],xmm4 
+  movdqa      [r10+rbp],xmm3 
+  movdqa      [rsi+rbp],xmm1 
+  mov         rsp,r11   
+  pop         rbp  
+  pop         rbx  
+  ret
+
+WELS_EXTERN  DeblockChromaLt4V_sse2
+ALIGN  16 
+DeblockChromaLt4V_sse2: 
+  mov         rax,rsp 
+  push        rbx  
+  push        rbp    
+  mov         r10,  rdx
+  mov         r11,  rcx
+  mov         rcx,  rdi
+  mov         rdx,  rsi  
+  mov         rsi,  r10
+  mov         r10,  r9
+  mov         rbp,  r8
+  mov         r8,   rsi
+  mov         r9,   r11
+  sub         rsp,0C8h   
+  pxor        xmm1,xmm1 
+  mov         rbx,rcx 
+  movsxd      r11,r8d 
+  movsx       ecx,byte [r10] 
+  movsx       r8d,byte [r10+2] 
+  mov         rdi,rdx 
+  movq        xmm2,[rbx] 
+  movq        xmm9,[r11+rbx] 
+  movsx       edx,byte [r10+1] 
+  mov         word [rsp+2],cx 
+  mov         word [rsp],cx 
+  movsx       eax,byte [r10+3] 
+  mov         word [rsp+6],dx 
+  mov         word [rsp+4],dx 
+  movdqa      xmm11,xmm1 
+  mov         word [rsp+0Eh],ax 
+  mov         word [rsp+0Ch],ax 
+  lea         eax,[r11+r11] 
+  movsxd      rcx,eax 
+  mov         rax,rbx 
+  mov         rdx,rdi 
+  sub         rax,rcx 
+  mov         word [rsp+0Ah],r8w 
+  mov         word [rsp+8],r8w 
+  movdqa      xmm6,[rsp] 
+  movdqa      xmm7,xmm6 
+  movq        xmm13, [rax] 
+  mov         rax,rdi 
+  sub         rax,rcx 
+  mov         rcx,rbx 
+  pcmpgtw     xmm7,xmm1 
+  psubw       xmm11,xmm6 
+  sub         rcx,r11 
+  sub         rdx,r11 
+  movq        xmm0,[rax] 
+  movsx       eax,r9w 
+  movq        xmm15,[rcx] 
+  punpcklqdq  xmm13,xmm0 
+  movq        xmm0, [rdx] 
+  movdqa      xmm4,xmm13 
+  punpcklqdq  xmm15,xmm0 
+  movq        xmm0, [rdi] 
+  punpcklbw   xmm4,xmm1 
+  movdqa      xmm12,xmm15 
+  punpcklqdq  xmm2,xmm0 
+  movq        xmm0, [r11+rdi] 
+  punpcklbw   xmm12,xmm1 
+  movdqa      xmm14,xmm2 
+  punpcklqdq  xmm9,xmm0 
+  punpckhbw   xmm2,xmm1 
+  punpcklbw   xmm14,xmm1 
+  movd        xmm0,eax 
+  mov         eax, ebp ; iBeta
+  punpckhbw   xmm13,xmm1 
+  punpckhbw   xmm15,xmm1 
+  movdqa      xmm3,xmm9 
+  movdqa      [rsp+10h],xmm2 
+  punpcklwd   xmm0,xmm0 
+  punpckhbw   xmm9,xmm1 
+  punpcklbw   xmm3,xmm1 
+  movdqa      xmm1,xmm14 
+  pshufd      xmm10,xmm0,0 
+  movd        xmm0,eax 
+  mov         eax,4 
+  cwde             
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm8,xmm0,0 
+  movd        xmm0,eax 
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm5,xmm0,0 
+  psubw       xmm1,xmm12 
+  movdqa      xmm2,xmm10 
+  lea         r11,[rsp+0C8h] 
+  psllw       xmm1,2 
+  movdqa      xmm0,xmm4 
+  psubw       xmm4,xmm12 
+  psubw       xmm0,xmm3 
+  psubw       xmm3,xmm14 
+  paddw       xmm1,xmm0 
+  paddw       xmm1,xmm5 
+  movdqa      xmm0,xmm11 
+  psraw       xmm1,3 
+  pmaxsw      xmm0,xmm1 
+  pminsw      xmm6,xmm0 
+  movdqa      xmm1,xmm8 
+  movdqa      xmm0,xmm12 
+  psubw       xmm0,xmm14 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm2,xmm0 
+  pabsw       xmm0,xmm4 
+  pcmpgtw     xmm1,xmm0 
+  pabsw       xmm0,xmm3 
+  movdqa      xmm3,[rsp] 
+  pand        xmm2,xmm1 
+  movdqa      xmm1,xmm8 
+  pcmpgtw     xmm1,xmm0 
+  movdqa      xmm0,xmm13 
+  pand        xmm2,xmm1 
+  psubw       xmm0,xmm9 
+  psubw       xmm13,xmm15 
+  pand        xmm2,xmm7 
+  pand        xmm6,xmm2 
+  paddw       xmm12,xmm6 
+  psubw       xmm14,xmm6 
+  movdqa      xmm2,[rsp+10h] 
+  movaps      xmm6,[r11-18h] 
+  movdqa      xmm1,xmm2 
+  psubw       xmm1,xmm15 
+  psubw       xmm9,xmm2 
+  psllw       xmm1,2 
+  paddw       xmm1,xmm0 
+  paddw       xmm1,xmm5 
+  movdqa      xmm0,xmm15 
+  psubw       xmm0,xmm2 
+  psraw       xmm1,3 
+  pmaxsw      xmm11,xmm1 
+  pabsw       xmm0,xmm0 
+  movdqa      xmm1,xmm8 
+  pcmpgtw     xmm10,xmm0 
+  pabsw       xmm0,xmm13 
+  pminsw      xmm3,xmm11 
+  movaps      xmm11,[r11-68h] 
+  movaps      xmm13,[rsp+40h] 
+  pcmpgtw     xmm1,xmm0 
+  pabsw       xmm0,xmm9 
+  movaps      xmm9, [r11-48h] 
+  pand        xmm10,xmm1 
+  pcmpgtw     xmm8,xmm0 
+  pand        xmm10,xmm8 
+  pand        xmm10,xmm7 
+  movaps      xmm8,[r11-38h] 
+  movaps      xmm7,[r11-28h] 
+  pand        xmm3,xmm10 
+  paddw       xmm15,xmm3 
+  psubw       xmm2,xmm3 
+  movaps      xmm10,[r11-58h] 
+  packuswb    xmm12,xmm15 
+  movaps      xmm15,[rsp+20h] 
+  packuswb    xmm14,xmm2 
+  movq        [rcx],xmm12 
+  movq        [rbx],xmm14 
+  psrldq      xmm12,8 
+  psrldq      xmm14,8 
+  movq        [rdx],xmm12 
+  movaps      xmm12,[r11-78h] 
+  movq        [rdi],xmm14 
+  movaps      xmm14,[rsp+30h] 
+  mov         rsp,r11 
+  pop         rbp  
+  pop         rbx  
+  ret
+
+WELS_EXTERN   DeblockChromaEq4V_sse2
+ALIGN 16
+DeblockChromaEq4V_sse2:
+  mov         rax,rsp 
+  push        rbx  
+  push        rbp
+
+  mov         rbp, r8
+  mov         r8, rdx
+  mov         r9, rcx
+  mov         rcx, rdi
+  mov         rdx, rsi
+  
+  sub         rsp,90h 
+  pxor        xmm1,xmm1 
+  mov         r11,rcx 
+  mov         rbx,rdx 
+  mov         r10d,r9d   
+  movq        xmm13,[r11] 
+  lea         eax,[r8+r8] 
+  movsxd      r9,eax 
+  mov         rax,rcx 
+  sub         rax,r9 
+  movq        xmm14,[rax] 
+  mov         rax,rdx 
+  sub         rax,r9 
+  movq        xmm0,[rax] 
+  movsxd      rax,r8d 
+  sub         rcx,rax 
+  sub         rdx,rax 
+  movq        xmm12,[rax+r11] 
+  movq        xmm10,[rcx] 
+  punpcklqdq  xmm14,xmm0 
+  movdqa      xmm8,xmm14 
+  movq        xmm0,[rdx] 
+  punpcklbw   xmm8,xmm1 
+  punpckhbw   xmm14,xmm1 
+  punpcklqdq  xmm10,xmm0 
+  movq        xmm0,[rbx] 
+  movdqa      xmm5,xmm10 
+  punpcklqdq  xmm13,xmm0 
+  movq        xmm0, [rax+rbx] 
+  punpcklbw   xmm5,xmm1 
+  movsx       eax,r10w 
+  movdqa      xmm9,xmm13 
+  punpcklqdq  xmm12,xmm0 
+  punpcklbw   xmm9,xmm1 
+  punpckhbw   xmm10,xmm1 
+  movd        xmm0,eax 
+  mov         eax, ebp   ; iBeta
+  punpckhbw   xmm13,xmm1 
+  movdqa      xmm7,xmm12 
+  punpcklwd   xmm0,xmm0 
+  punpckhbw   xmm12,xmm1 
+  pshufd      xmm11,xmm0,0 
+  punpcklbw   xmm7,xmm1 
+  movd        xmm0,eax 
+  movdqa      xmm1,xmm8 
+  psubw       xmm1,xmm5 
+  punpcklwd   xmm0,xmm0 
+  movdqa      xmm6,xmm11 
+  pshufd      xmm3,xmm0,0 
+  movdqa      xmm0,xmm5 
+  psubw       xmm0,xmm9 
+  movdqa      xmm2,xmm3 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm6,xmm0 
+  pabsw       xmm0,xmm1 
+  movdqa      xmm1,xmm3 
+  pcmpgtw     xmm2,xmm0 
+  pand        xmm6,xmm2 
+  movdqa      xmm0,xmm7 
+  movdqa      xmm2,xmm3 
+  psubw       xmm0,xmm9 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm1,xmm0 
+  pand        xmm6,xmm1 
+  movdqa      xmm0,xmm10 
+  movdqa      xmm1,xmm14 
+  psubw       xmm0,xmm13 
+  psubw       xmm1,xmm10 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm11,xmm0 
+  pabsw       xmm0,xmm1 
+  pcmpgtw     xmm2,xmm0 
+  pand        xmm11,xmm2 
+  movdqa      xmm0,xmm12 
+  movdqa      xmm4,xmm6 
+  movdqa      xmm1,xmm8 
+  mov         eax,2 
+  cwde             
+  paddw       xmm1,xmm8 
+  psubw       xmm0,xmm13 
+  paddw       xmm1,xmm5 
+  pabsw       xmm0,xmm0 
+  movdqa      xmm2,xmm14 
+  paddw       xmm1,xmm7 
+  pcmpgtw     xmm3,xmm0 
+  paddw       xmm2,xmm14 
+  movd        xmm0,eax 
+  pand        xmm11,xmm3 
+  paddw       xmm7,xmm7 
+  paddw       xmm2,xmm10 
+  punpcklwd   xmm0,xmm0 
+  paddw       xmm2,xmm12 
+  paddw       xmm12,xmm12 
+  pshufd      xmm3,xmm0,0 
+  paddw       xmm7,xmm9 
+  paddw       xmm12,xmm13 
+  movdqa      xmm0,xmm6 
+  paddw       xmm1,xmm3 
+  pandn       xmm0,xmm5 
+  paddw       xmm7,xmm8 
+  psraw       xmm1,2 
+  paddw       xmm12,xmm14 
+  paddw       xmm7,xmm3 
+  ;movaps      xmm14,[rsp] 
+  pand        xmm4,xmm1 
+  paddw       xmm12,xmm3 
+  psraw       xmm7,2 
+  movdqa      xmm1,xmm11 
+  por         xmm4,xmm0 
+  psraw       xmm12,2 
+  paddw       xmm2,xmm3 
+  movdqa      xmm0,xmm11 
+  pandn       xmm0,xmm10 
+  psraw       xmm2,2 
+  pand        xmm1,xmm2 
+  por         xmm1,xmm0 
+  packuswb    xmm4,xmm1 
+  movdqa      xmm0,xmm11 
+  movdqa      xmm1,xmm6 
+  pand        xmm1,xmm7 
+  movq        [rcx],xmm4 
+  pandn       xmm6,xmm9 
+  pandn       xmm11,xmm13 
+  pand        xmm0,xmm12 
+  por         xmm1,xmm6 
+  por         xmm0,xmm11 
+  psrldq      xmm4,8 
+  packuswb    xmm1,xmm0 
+  movq        [r11],xmm1 
+  psrldq      xmm1,8 
+  movq        [rdx],xmm4 
+  lea         r11,[rsp+90h] 
+  movq        [rbx],xmm1 
+  mov         rsp,r11 
+  pop         rbp
+  pop         rbx  
+  ret
+
+
+WELS_EXTERN   DeblockChromaEq4H_sse2
+ALIGN  16
+DeblockChromaEq4H_sse2:
+  mov         rax,rsp 
+  push        rbx 
+  push        rbp 
+  push        r12
+  
+  mov         rbp,   r8  
+  mov         r8,    rdx
+  mov         r9,    rcx
+  mov         rcx,   rdi
+  mov         rdx,   rsi  
+  mov         rdi,   rdx
+
+  sub         rsp,140h     
+  lea         eax,[r8*4] 
+  movsxd      r10,eax 
+  mov         eax,[rcx-2] 
+  mov         [rsp+10h],eax 
+  lea         rbx,[r10+rdx-2] 
+  lea         r11,[r10+rcx-2] 
+
+  movdqa      xmm5,[rsp+10h] 
+  movsxd      r10,r8d 
+  mov         eax,[r10+rcx-2] 
+  lea         rdx,[r10+r10*2] 
+  mov         [rsp+20h],eax 
+  mov         eax,[rcx+r10*2-2] 
+  mov         [rsp+30h],eax 
+  mov         eax,[rdx+rcx-2]
+  movdqa      xmm2,[rsp+20h] 
+  mov         [rsp+40h],eax 
+  mov         eax, [rdi-2] 
+  movdqa      xmm4,[rsp+30h] 
+  mov         [rsp+50h],eax 
+  mov         eax,[r10+rdi-2] 
+  movdqa      xmm3,[rsp+40h] 
+  mov         [rsp+60h],eax 
+  mov         eax,[rdi+r10*2-2] 
+  punpckldq   xmm5,[rsp+50h] 
+  mov         [rsp+70h],eax 
+  mov         eax, [rdx+rdi-2] 
+  punpckldq   xmm2, [rsp+60h] 
+  mov          [rsp+80h],eax 
+  mov         eax,[r11] 
+  punpckldq   xmm4, [rsp+70h] 
+  mov         [rsp+50h],eax 
+  mov         eax,[rbx] 
+  punpckldq   xmm3,[rsp+80h] 
+  mov         [rsp+60h],eax 
+  mov         eax,[r10+r11] 
+  movdqa      xmm0, [rsp+50h] 
+  punpckldq   xmm0, [rsp+60h] 
+  punpcklqdq  xmm5,xmm0 
+  movdqa      [rsp+50h],xmm0 
+  mov         [rsp+50h],eax 
+  mov         eax,[r10+rbx] 
+  movdqa      xmm0,[rsp+50h] 
+  movdqa      xmm1,xmm5 
+  mov         [rsp+60h],eax 
+  mov         eax,[r11+r10*2] 
+  punpckldq   xmm0, [rsp+60h] 
+  punpcklqdq  xmm2,xmm0 
+  punpcklbw   xmm1,xmm2 
+  punpckhbw   xmm5,xmm2 
+  movdqa      [rsp+50h],xmm0 
+  mov         [rsp+50h],eax 
+  mov         eax,[rbx+r10*2] 
+  movdqa      xmm0,[rsp+50h] 
+  mov         [rsp+60h],eax 
+  mov         eax, [rdx+r11] 
+  movdqa      xmm15,xmm1 
+  punpckldq   xmm0,[rsp+60h] 
+  punpcklqdq  xmm4,xmm0 
+  movdqa      [rsp+50h],xmm0 
+  mov         [rsp+50h],eax 
+  mov         eax, [rdx+rbx] 
+  movdqa      xmm0,[rsp+50h] 
+  mov         [rsp+60h],eax 
+  punpckldq   xmm0, [rsp+60h] 
+  punpcklqdq  xmm3,xmm0 
+  movdqa      xmm0,xmm4 
+  punpcklbw   xmm0,xmm3 
+  punpckhbw   xmm4,xmm3 
+  punpcklwd   xmm15,xmm0 
+  punpckhwd   xmm1,xmm0 
+  movdqa      xmm0,xmm5 
+  movdqa      xmm12,xmm15 
+  punpcklwd   xmm0,xmm4 
+  punpckhwd   xmm5,xmm4 
+  punpckldq   xmm12,xmm0 
+  punpckhdq   xmm15,xmm0 
+  movdqa      xmm0,xmm1 
+  movdqa      xmm11,xmm12 
+  punpckldq   xmm0,xmm5 
+  punpckhdq   xmm1,xmm5 
+  punpcklqdq  xmm11,xmm0 
+  punpckhqdq  xmm12,xmm0 
+  movsx       eax,r9w 
+  movdqa      xmm14,xmm15 
+  punpcklqdq  xmm14,xmm1 
+  punpckhqdq  xmm15,xmm1 
+  pxor        xmm1,xmm1 
+  movd        xmm0,eax 
+  movdqa      xmm4,xmm12 
+  movdqa      xmm8,xmm11 
+  mov         eax, ebp ; iBeta
+  punpcklwd   xmm0,xmm0 
+  punpcklbw   xmm4,xmm1 
+  punpckhbw   xmm12,xmm1 
+  movdqa      xmm9,xmm14 
+  movdqa      xmm7,xmm15 
+  movdqa      xmm10,xmm15 
+  pshufd      xmm13,xmm0,0 
+  punpcklbw   xmm9,xmm1 
+  punpckhbw   xmm14,xmm1 
+  movdqa      xmm6,xmm13 
+  movd        xmm0,eax 
+  movdqa      [rsp],xmm11 
+  mov         eax,2 
+  cwde             
+  punpckhbw   xmm11,xmm1 
+  punpckhbw   xmm10,xmm1 
+  punpcklbw   xmm7,xmm1 
+  punpcklwd   xmm0,xmm0 
+  punpcklbw   xmm8,xmm1 
+  pshufd      xmm3,xmm0,0 
+  movdqa      xmm1,xmm8 
+  movdqa      xmm0,xmm4 
+  psubw       xmm0,xmm9 
+  psubw       xmm1,xmm4 
+  movdqa      xmm2,xmm3 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm6,xmm0 
+  pabsw       xmm0,xmm1 
+  movdqa      xmm1,xmm3 
+  pcmpgtw     xmm2,xmm0 
+  pand        xmm6,xmm2 
+  movdqa      xmm0,xmm7 
+  movdqa      xmm2,xmm3 
+  psubw       xmm0,xmm9 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm1,xmm0 
+  pand        xmm6,xmm1 
+  movdqa      xmm0,xmm12 
+  movdqa      xmm1,xmm11 
+  psubw       xmm0,xmm14 
+  psubw       xmm1,xmm12 
+  movdqa      xmm5,xmm6 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm13,xmm0 
+  pabsw       xmm0,xmm1 
+  movdqa      xmm1,xmm8 
+  pcmpgtw     xmm2,xmm0 
+  paddw       xmm1,xmm8 
+  movdqa      xmm0,xmm10 
+  pand        xmm13,xmm2 
+  psubw       xmm0,xmm14 
+  paddw       xmm1,xmm4 
+  movdqa      xmm2,xmm11 
+  pabsw       xmm0,xmm0 
+  paddw       xmm2,xmm11 
+  paddw       xmm1,xmm7 
+  pcmpgtw     xmm3,xmm0 
+  paddw       xmm2,xmm12 
+  movd        xmm0,eax 
+  pand        xmm13,xmm3 
+  paddw       xmm2,xmm10 
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm3,xmm0,0 
+  movdqa      xmm0,xmm6 
+  paddw       xmm1,xmm3 
+  pandn       xmm0,xmm4 
+  paddw       xmm2,xmm3 
+  psraw       xmm1,2 
+  pand        xmm5,xmm1 
+  por         xmm5,xmm0 
+  paddw       xmm7,xmm7 
+  paddw       xmm10,xmm10 
+  psraw       xmm2,2 
+  movdqa      xmm1,xmm13 
+  movdqa      xmm0,xmm13 
+  pandn       xmm0,xmm12 
+  pand        xmm1,xmm2 
+  paddw       xmm7,xmm9 
+  por         xmm1,xmm0 
+  paddw       xmm10,xmm14 
+  paddw       xmm7,xmm8 
+  movdqa      xmm0,xmm13 
+  packuswb    xmm5,xmm1 
+  paddw       xmm7,xmm3 
+  paddw       xmm10,xmm11 
+  movdqa      xmm1,xmm6 
+  paddw       xmm10,xmm3 
+  pandn       xmm6,xmm9 
+  psraw       xmm7,2 
+  pand        xmm1,xmm7 
+  psraw       xmm10,2 
+  pandn       xmm13,xmm14 
+  pand        xmm0,xmm10 
+  por         xmm1,xmm6 
+  movdqa      xmm6,[rsp] 
+  movdqa      xmm4,xmm6 
+  por         xmm0,xmm13 
+  punpcklbw   xmm4,xmm5 
+  punpckhbw   xmm6,xmm5 
+  movdqa      xmm3,xmm4 
+  packuswb    xmm1,xmm0 
+  movdqa      xmm0,xmm1 
+  punpckhbw   xmm1,xmm15 
+  punpcklbw   xmm0,xmm15 
+  punpcklwd   xmm3,xmm0 
+  punpckhwd   xmm4,xmm0 
+  movdqa      xmm0,xmm6 
+  movdqa      xmm2,xmm3 
+  punpcklwd   xmm0,xmm1 
+  punpckhwd   xmm6,xmm1 
+  movdqa      xmm1,xmm4 
+  punpckldq   xmm2,xmm0 
+  punpckhdq   xmm3,xmm0 
+  punpckldq   xmm1,xmm6 
+  movdqa      xmm0,xmm2 
+  punpcklqdq  xmm0,xmm1 
+  punpckhdq   xmm4,xmm6 
+  punpckhqdq  xmm2,xmm1 
+  movdqa      [rsp+10h],xmm0 
+  movdqa      [rsp+60h],xmm2 
+  movdqa      xmm0,xmm3 
+  mov         eax,[rsp+10h] 
+  mov         [rcx-2],eax 
+  mov         eax,[rsp+60h] 
+  punpcklqdq  xmm0,xmm4 
+  punpckhqdq  xmm3,xmm4 
+  mov         [r10+rcx-2],eax 
+  movdqa      [rsp+20h],xmm0 
+  mov         eax, [rsp+20h] 
+  movdqa      [rsp+70h],xmm3 
+  mov         [rcx+r10*2-2],eax 
+  mov         eax,[rsp+70h] 
+  mov         [rdx+rcx-2],eax 
+  mov         eax,[rsp+18h] 
+  mov         [r11],eax 
+  mov         eax,[rsp+68h] 
+  mov         [r10+r11],eax 
+  mov         eax,[rsp+28h] 
+  mov         [r11+r10*2],eax 
+  mov         eax,[rsp+78h] 
+  mov         [rdx+r11],eax 
+  mov         eax,[rsp+14h] 
+  mov         [rdi-2],eax 
+  mov         eax,[rsp+64h] 
+  mov         [r10+rdi-2],eax 
+  mov         eax,[rsp+24h] 
+  mov         [rdi+r10*2-2],eax 
+  mov         eax, [rsp+74h] 
+  mov         [rdx+rdi-2],eax 
+  mov         eax, [rsp+1Ch] 
+  mov         [rbx],eax 
+  mov         eax, [rsp+6Ch] 
+  mov         [r10+rbx],eax 
+  mov         eax,[rsp+2Ch] 
+  mov         [rbx+r10*2],eax 
+  mov         eax,[rsp+7Ch] 
+  mov         [rdx+rbx],eax  
+  lea         r11,[rsp+140h] 
+  mov         rbx, [r11+28h]    
+  mov         rsp,r11
+  pop         r12
+  pop         rbp
+  pop         rbx
+  ret
+
+
+WELS_EXTERN DeblockChromaLt4H_sse2
+ALIGN  16
+DeblockChromaLt4H_sse2:
+  mov         rax,rsp 
+  push        rbx  
+  push        rbp  
+  push        r12  
+  push        r13
+  push        r14
+  sub         rsp,170h  
+  
+  mov         r13,   r8
+  mov         r14,   r9
+  mov         r8,    rdx
+  mov         r9,    rcx
+  mov         rdx,   rdi
+  mov         rcx,   rsi
+
+  movsxd      rsi,r8d 
+  lea         eax,[r8*4] 
+  mov         r11d,r9d 
+  movsxd      r10,eax 
+  mov         eax, [rcx-2] 
+  mov         r12,rdx 
+  mov         [rsp+40h],eax 
+  mov         eax, [rsi+rcx-2] 
+  lea         rbx,[r10+rcx-2] 
+  movdqa      xmm5,[rsp+40h] 
+  mov         [rsp+50h],eax 
+  mov         eax, [rcx+rsi*2-2] 
+  lea         rbp,[r10+rdx-2] 
+  movdqa      xmm2, [rsp+50h] 
+  mov         [rsp+60h],eax 
+  lea         r10,[rsi+rsi*2] 
+  mov         rdi,rcx 
+  mov         eax,[r10+rcx-2] 
+  movdqa      xmm4,[rsp+60h] 
+  mov         [rsp+70h],eax 
+  mov         eax,[rdx-2] 
+  mov         [rsp+80h],eax 
+  mov         eax, [rsi+rdx-2] 
+  movdqa      xmm3,[rsp+70h] 
+  mov         [rsp+90h],eax 
+  mov         eax,[rdx+rsi*2-2] 
+  punpckldq   xmm5,[rsp+80h] 
+  mov         [rsp+0A0h],eax 
+  mov         eax, [r10+rdx-2] 
+  punpckldq   xmm2,[rsp+90h] 
+  mov         [rsp+0B0h],eax 
+  mov         eax, [rbx] 
+  punpckldq   xmm4,[rsp+0A0h] 
+  mov         [rsp+80h],eax 
+  mov         eax,[rbp] 
+  punpckldq   xmm3,[rsp+0B0h] 
+  mov         [rsp+90h],eax 
+  mov         eax,[rsi+rbx] 
+  movdqa      xmm0,[rsp+80h] 
+  punpckldq   xmm0,[rsp+90h] 
+  punpcklqdq  xmm5,xmm0 
+  movdqa      [rsp+80h],xmm0 
+  mov         [rsp+80h],eax 
+  mov         eax,[rsi+rbp] 
+  movdqa      xmm0,[rsp+80h] 
+  movdqa      xmm1,xmm5 
+  mov         [rsp+90h],eax 
+  mov         eax,[rbx+rsi*2] 
+  punpckldq   xmm0,[rsp+90h] 
+  punpcklqdq  xmm2,xmm0 
+  punpcklbw   xmm1,xmm2 
+  punpckhbw   xmm5,xmm2 
+  movdqa      [rsp+80h],xmm0 
+  mov         [rsp+80h],eax 
+  mov         eax,[rbp+rsi*2] 
+  movdqa      xmm0, [rsp+80h] 
+  mov         [rsp+90h],eax 
+  mov         eax,[r10+rbx] 
+  movdqa      xmm7,xmm1 
+  punpckldq   xmm0,[rsp+90h] 
+  punpcklqdq  xmm4,xmm0 
+  movdqa      [rsp+80h],xmm0 
+  mov         [rsp+80h],eax 
+  mov         eax, [r10+rbp] 
+  movdqa      xmm0,[rsp+80h] 
+  mov         [rsp+90h],eax 
+  punpckldq   xmm0,[rsp+90h] 
+  punpcklqdq  xmm3,xmm0 
+  movdqa      xmm0,xmm4 
+  punpcklbw   xmm0,xmm3 
+  punpckhbw   xmm4,xmm3 
+  punpcklwd   xmm7,xmm0 
+  punpckhwd   xmm1,xmm0 
+  movdqa      xmm0,xmm5 
+  movdqa      xmm6,xmm7 
+  punpcklwd   xmm0,xmm4 
+  punpckhwd   xmm5,xmm4 
+  punpckldq   xmm6,xmm0 
+  punpckhdq   xmm7,xmm0 
+  movdqa      xmm0,xmm1 
+  punpckldq   xmm0,xmm5 
+  mov         rax, r14    ; pTC
+  punpckhdq   xmm1,xmm5 
+  movdqa      xmm9,xmm6 
+  punpckhqdq  xmm6,xmm0 
+  punpcklqdq  xmm9,xmm0 
+  movdqa      xmm2,xmm7 
+  movdqa      xmm13,xmm6 
+  movdqa      xmm4,xmm9 
+  movdqa      [rsp+10h],xmm9 
+  punpcklqdq  xmm2,xmm1 
+  punpckhqdq  xmm7,xmm1 
+  pxor        xmm1,xmm1 
+  movsx       ecx,byte [rax+3] 
+  movsx       edx,byte [rax+2] 
+  movsx       r8d,byte [rax+1] 
+  movsx       r9d,byte [rax] 
+  movdqa      xmm10,xmm1 
+  movdqa      xmm15,xmm2 
+  punpckhbw   xmm2,xmm1 
+  punpckhbw   xmm6,xmm1 
+  punpcklbw   xmm4,xmm1 
+  movsx       eax,r11w 
+  mov         word [rsp+0Eh],cx 
+  mov         word [rsp+0Ch],cx 
+  movdqa      xmm3,xmm7 
+  movdqa      xmm8,xmm7 
+  movdqa      [rsp+20h],xmm7 
+  punpcklbw   xmm15,xmm1 
+  punpcklbw   xmm13,xmm1 
+  punpcklbw   xmm3,xmm1 
+  mov         word [rsp+0Ah],dx 
+  mov         word [rsp+8],dx 
+  mov         word [rsp+6],r8w 
+  movd        xmm0,eax 
+  movdqa      [rsp+30h],xmm6 
+  punpckhbw   xmm9,xmm1 
+  punpckhbw   xmm8,xmm1 
+  punpcklwd   xmm0,xmm0 
+  mov         eax, r13d   ; iBeta
+  mov         word [rsp+4],r8w 
+  mov         word [rsp+2],r9w 
+  pshufd      xmm12,xmm0,0 
+  mov         word [rsp],r9w 
+  movd        xmm0,eax 
+  mov         eax,4 
+  cwde             
+  movdqa      xmm14, [rsp] 
+  movdqa      [rsp],xmm2 
+  movdqa      xmm2,xmm12 
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm11,xmm0,0 
+  psubw       xmm10,xmm14 
+  movd        xmm0,eax 
+  movdqa      xmm7,xmm14 
+  movdqa      xmm6,xmm14 
+  pcmpgtw     xmm7,xmm1 
+  punpcklwd   xmm0,xmm0 
+  pshufd      xmm5,xmm0,0 
+  movdqa      xmm0,xmm4 
+  movdqa      xmm1,xmm15 
+  psubw       xmm4,xmm13 
+  psubw       xmm0,xmm3 
+  psubw       xmm1,xmm13 
+  psubw       xmm3,xmm15 
+  psllw       xmm1,2 
+  paddw       xmm1,xmm0 
+  paddw       xmm1,xmm5 
+  movdqa      xmm0,xmm10 
+  psraw       xmm1,3 
+  pmaxsw      xmm0,xmm1 
+  pminsw      xmm6,xmm0 
+  movdqa      xmm1,xmm11 
+  movdqa      xmm0,xmm13 
+  psubw       xmm0,xmm15 
+  pabsw       xmm0,xmm0 
+  pcmpgtw     xmm2,xmm0 
+  pabsw       xmm0,xmm4 
+  pcmpgtw     xmm1,xmm0 
+  pabsw       xmm0,xmm3 
+  pand        xmm2,xmm1 
+  movdqa      xmm1,xmm11 
+  movdqa      xmm3,[rsp+30h] 
+  pcmpgtw     xmm1,xmm0 
+  movdqa      xmm0,xmm9 
+  pand        xmm2,xmm1 
+  psubw       xmm0,xmm8 
+  psubw       xmm9,xmm3 
+  pand        xmm2,xmm7 
+  pand        xmm6,xmm2 
+  psubw       xmm15,xmm6 
+  paddw       xmm13,xmm6 
+  movdqa      xmm2,[rsp] 
+  movdqa      xmm1,xmm2 
+  psubw       xmm1,xmm3 
+  psubw       xmm8,xmm2 
+  psllw       xmm1,2 
+  paddw       xmm1,xmm0 
+  paddw       xmm1,xmm5 
+  movdqa      xmm0,xmm3 
+  movdqa      xmm5,[rsp+10h] 
+  psubw       xmm0,xmm2 
+  psraw       xmm1,3 
+  movdqa      xmm4,xmm5 
+  pabsw       xmm0,xmm0 
+  pmaxsw      xmm10,xmm1 
+  movdqa      xmm1,xmm11 
+  pcmpgtw     xmm12,xmm0 
+  pabsw       xmm0,xmm9 
+  pminsw      xmm14,xmm10 
+  pcmpgtw     xmm1,xmm0 
+  pabsw       xmm0,xmm8 
+  pcmpgtw     xmm11,xmm0 
+  pand        xmm12,xmm1 
+  movdqa      xmm1,[rsp+20h] 
+  pand        xmm12,xmm11 
+  pand        xmm12,xmm7 
+  pand        xmm14,xmm12 
+  paddw       xmm3,xmm14 
+  psubw       xmm2,xmm14 
+  packuswb    xmm13,xmm3 
+  packuswb    xmm15,xmm2 
+  punpcklbw   xmm4,xmm13 
+  punpckhbw   xmm5,xmm13 
+  movdqa      xmm0,xmm15 
+  punpcklbw   xmm0,xmm1 
+  punpckhbw   xmm15,xmm1 
+  movdqa      xmm3,xmm4 
+  punpcklwd   xmm3,xmm0 
+  punpckhwd   xmm4,xmm0 
+  movdqa      xmm0,xmm5 
+  movdqa      xmm2,xmm3 
+  movdqa      xmm1,xmm4 
+  punpcklwd   xmm0,xmm15 
+  punpckhwd   xmm5,xmm15 
+  punpckldq   xmm2,xmm0 
+  punpckhdq   xmm3,xmm0 
+  punpckldq   xmm1,xmm5 
+  movdqa      xmm0,xmm2 
+  punpcklqdq  xmm0,xmm1 
+  punpckhdq   xmm4,xmm5 
+  punpckhqdq  xmm2,xmm1 
+  movdqa      [rsp+40h],xmm0 
+  movdqa      xmm0,xmm3 
+  movdqa      [rsp+90h],xmm2 
+  mov         eax,[rsp+40h] 
+  mov         [rdi-2],eax 
+  mov         eax, [rsp+90h] 
+  punpcklqdq  xmm0,xmm4 
+  punpckhqdq  xmm3,xmm4 
+  mov         [rsi+rdi-2],eax 
+  movdqa      [rsp+50h],xmm0 
+  mov         eax,[rsp+50h] 
+  movdqa      [rsp+0A0h],xmm3 
+  mov         [rdi+rsi*2-2],eax 
+  mov         eax,[rsp+0A0h] 
+  mov         [r10+rdi-2],eax 
+  mov         eax,[rsp+48h] 
+  mov         [rbx],eax 
+  mov         eax,[rsp+98h] 
+  mov         [rsi+rbx],eax 
+  mov         eax,[rsp+58h] 
+  mov         [rbx+rsi*2],eax 
+  mov         eax, [rsp+0A8h] 
+  mov         [r10+rbx],eax 
+  mov         eax, [rsp+44h] 
+  mov         [r12-2],eax 
+  mov         eax,[rsp+94h] 
+  mov         [rsi+r12-2],eax 
+  mov         eax,[rsp+54h] 
+  mov         [r12+rsi*2-2],eax 
+  mov         eax, [rsp+0A4h] 
+  mov         [r10+r12-2],eax 
+  mov         eax,[rsp+4Ch] 
+  mov         [rbp],eax 
+  mov         eax,[rsp+9Ch] 
+  mov         [rsi+rbp],eax 
+  mov         eax, [rsp+5Ch] 
+  mov         [rbp+rsi*2],eax 
+  mov         eax,[rsp+0ACh] 
+  mov         [r10+rbp],eax   
+  lea         r11,[rsp+170h]    
+  mov         rsp,r11 
+  pop         r14
+  pop         r13
+  pop         r12  
+  pop         rbp  
+  pop         rbx  
+  ret 
+
+
+
+%elifdef  X86_32
+
+;********************************************************************************
+;  void DeblockChromaEq4V_sse2(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
+;                             int32_t iAlpha, int32_t iBeta)
+;********************************************************************************
+WELS_EXTERN   DeblockChromaEq4V_sse2
+
+ALIGN  16
+DeblockChromaEq4V_sse2:
+  push        ebp
+  mov         ebp,esp
+  and         esp,0FFFFFFF0h
+  sub         esp,68h
+  mov         edx,[ebp+10h]      ;  iStride
+  mov         eax,[ebp+8]        ;  pPixCb
+  mov         ecx,[ebp+0Ch]      ;  pPixCr
+  movq        xmm4,[ecx]
+  movq        xmm5,[edx+ecx]
+  push        esi
+  push        edi
+  lea         esi,[edx+edx]
+  mov         edi,eax
+  sub         edi,esi
+  movq        xmm1,[edi]
+  mov         edi,ecx
+  sub         edi,esi
+  movq        xmm2,[edi]
+  punpcklqdq  xmm1,xmm2
+  mov         esi,eax
+  sub         esi,edx
+  movq        xmm2,[esi]
+  mov         edi,ecx
+  sub         edi,edx
+  movq        xmm3,[edi]
+  punpcklqdq  xmm2,xmm3
+  movq        xmm3,[eax]
+  punpcklqdq  xmm3,xmm4
+  movq        xmm4,[edx+eax]
+  mov       edx, [ebp + 14h]
+  punpcklqdq  xmm4,xmm5
+  movd        xmm5,edx
+  mov       edx, [ebp + 18h]
+  pxor        xmm0,xmm0
+  movdqa      xmm6,xmm5
+  punpcklwd   xmm6,xmm5
+  pshufd      xmm5,xmm6,0
+  movd        xmm6,edx
+  movdqa      xmm7,xmm6
+  punpcklwd   xmm7,xmm6
+  pshufd      xmm6,xmm7,0
+  movdqa      xmm7,xmm1
+  punpckhbw   xmm1,xmm0
+  punpcklbw   xmm7,xmm0
+  movdqa      [esp+40h],xmm1
+  movdqa      [esp+60h],xmm7
+  movdqa      xmm7,xmm2
+  punpcklbw   xmm7,xmm0
+  movdqa      [esp+10h],xmm7
+  movdqa      xmm7,xmm3
+  punpcklbw   xmm7,xmm0
+  punpckhbw   xmm3,xmm0
+  movdqa      [esp+50h],xmm7
+  movdqa      xmm7,xmm4
+  punpckhbw   xmm4,xmm0
+  punpckhbw   xmm2,xmm0
+  punpcklbw   xmm7,xmm0
+  movdqa      [esp+30h],xmm3
+  movdqa      xmm3,[esp+10h]
+  movdqa      xmm1,xmm3
+  psubw       xmm1,[esp+50h]
+  pabsw       xmm1,xmm1
+  movdqa      [esp+20h],xmm4
+  movdqa      xmm0,xmm5
+  pcmpgtw     xmm0,xmm1
+  movdqa      xmm1,[esp+60h]
+  psubw       xmm1,xmm3
+  pabsw       xmm1,xmm1
+  movdqa      xmm4,xmm6
+  pcmpgtw     xmm4,xmm1
+  pand        xmm0,xmm4
+  movdqa      xmm1,xmm7
+  psubw       xmm1,[esp+50h]
+  pabsw       xmm1,xmm1
+  movdqa      xmm4,xmm6
+  pcmpgtw     xmm4,xmm1
+  movdqa      xmm1,xmm2
+  psubw       xmm1,[esp+30h]
+  pabsw       xmm1,xmm1
+  pcmpgtw     xmm5,xmm1
+  movdqa      xmm1,[esp+40h]
+  pand        xmm0,xmm4
+  psubw       xmm1,xmm2
+  pabsw       xmm1,xmm1
+  movdqa      xmm4,xmm6
+  pcmpgtw     xmm4,xmm1
+  movdqa      xmm1,[esp+20h]
+  psubw       xmm1,[esp+30h]
+  pand        xmm5,xmm4
+  pabsw       xmm1,xmm1
+  pcmpgtw     xmm6,xmm1
+  pand        xmm5,xmm6
+  mov         edx,2
+  movsx       edx,dx
+  movd        xmm1,edx
+  movdqa      xmm4,xmm1
+  punpcklwd   xmm4,xmm1
+  pshufd      xmm1,xmm4,0
+  movdqa      xmm4,[esp+60h]
+  movdqa      xmm6,xmm4
+  paddw       xmm6,xmm4
+  paddw       xmm6,xmm3
+  paddw       xmm6,xmm7
+  movdqa      [esp+10h],xmm1
+  paddw       xmm6,[esp+10h]
+  psraw       xmm6,2
+  movdqa      xmm4,xmm0
+  pandn       xmm4,xmm3
+  movdqa      xmm3,[esp+40h]
+  movdqa      xmm1,xmm0
+  pand        xmm1,xmm6
+  por         xmm1,xmm4
+  movdqa      xmm6,xmm3
+  paddw       xmm6,xmm3
+  movdqa      xmm3,[esp+10h]
+  paddw       xmm6,xmm2
+  paddw       xmm6,[esp+20h]
+  paddw       xmm6,xmm3
+  psraw       xmm6,2
+  movdqa      xmm4,xmm5
+  pand        xmm4,xmm6
+  movdqa      xmm6,xmm5
+  pandn       xmm6,xmm2
+  por         xmm4,xmm6
+  packuswb    xmm1,xmm4
+  movdqa      xmm4,[esp+50h]
+  movdqa      xmm6,xmm7
+  paddw       xmm6,xmm7
+  paddw       xmm6,xmm4
+  paddw       xmm6,[esp+60h]
+  paddw       xmm6,xmm3
+  psraw       xmm6,2
+  movdqa      xmm2,xmm0
+  pand        xmm2,xmm6
+  pandn       xmm0,xmm4
+  por         xmm2,xmm0
+  movdqa      xmm0,[esp+20h]
+  movdqa      xmm6,xmm0
+  paddw       xmm6,xmm0
+  movdqa      xmm0,[esp+30h]
+  paddw       xmm6,xmm0
+  paddw       xmm6,[esp+40h]
+  movdqa      xmm4,xmm5
+  paddw       xmm6,xmm3
+  movq        [esi],xmm1
+  psraw       xmm6,2
+  pand        xmm4,xmm6
+  pandn       xmm5,xmm0
+  por         xmm4,xmm5
+  packuswb    xmm2,xmm4
+  movq        [eax],xmm2
+  psrldq      xmm1,8
+  movq        [edi],xmm1
+  pop         edi
+  psrldq      xmm2,8
+  movq        [ecx],xmm2
+  pop         esi
+  mov         esp,ebp
+  pop         ebp
+  ret
+
+;******************************************************************************
+; void DeblockChromaLt4V_sse2(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
+;                           int32_t iAlpha, int32_t iBeta, int8_t * pTC);
+;*******************************************************************************
+
+WELS_EXTERN  DeblockChromaLt4V_sse2
+
+DeblockChromaLt4V_sse2:
+  push        ebp
+  mov         ebp,esp
+  and         esp,0FFFFFFF0h
+  sub         esp,0E4h
+  push        ebx
+  push        esi
+  mov         esi, [ebp+1Ch]      ;  pTC
+  movsx       ebx, byte [esi+2]
+  push        edi
+  movsx       di,byte [esi+3]
+  mov         word [esp+0Ch],bx
+  movsx       bx,byte  [esi+1]
+  movsx       esi,byte  [esi]
+  mov         word  [esp+0Eh],si
+  movzx       esi,di
+  movd        xmm1,esi
+  movzx       esi,di
+  movd        xmm2,esi
+  mov         si,word  [esp+0Ch]
+  mov         edx, [ebp + 10h]
+  mov         eax, [ebp + 08h]
+  movzx       edi,si
+  movzx       esi,si
+  mov         ecx, [ebp + 0Ch]
+  movd        xmm4,esi
+  movzx       esi,bx
+  movd        xmm5,esi
+  movd        xmm3,edi
+  movzx       esi,bx
+  movd        xmm6,esi
+  mov         si,word [esp+0Eh]
+  movzx       edi,si
+  movzx       esi,si
+  punpcklwd   xmm6,xmm2
+  pxor        xmm0,xmm0
+  movdqa      [esp+40h],xmm0
+  movd        xmm7,edi
+  movd        xmm0,esi
+  lea         esi,[edx+edx]
+  mov         edi,eax
+  sub         edi,esi
+  punpcklwd   xmm5,xmm1
+  movdqa      xmm1,[esp+40h]
+  punpcklwd   xmm0,xmm4
+  movq        xmm4,[edx+ecx]
+  punpcklwd   xmm7,xmm3
+  movq        xmm3,[eax]
+  punpcklwd   xmm0,xmm6
+  movq        xmm6,[edi]
+  punpcklwd   xmm7,xmm5
+  punpcklwd   xmm0,xmm7
+  mov         edi,ecx
+  sub         edi,esi
+  movdqa      xmm2,xmm1
+  psubw       xmm2,xmm0
+  movdqa      [esp+60h],xmm2
+  movq        xmm2, [edi]
+  punpcklqdq  xmm6,xmm2
+  mov         esi,eax
+  sub         esi,edx
+  movq        xmm7,[esi]
+  mov         edi,ecx
+  sub         edi,edx
+  movq        xmm2,[edi]
+  punpcklqdq  xmm7,xmm2
+  movq        xmm2,[ecx]
+  punpcklqdq  xmm3,xmm2
+  movq        xmm2,[edx+eax]
+  movsx       edx,word [ebp + 14h]
+  punpcklqdq  xmm2,xmm4
+  movdqa      [esp+0E0h],xmm2
+  movd        xmm2,edx
+  movsx       edx,word [ebp + 18h]
+  movdqa      xmm4,xmm2
+  punpcklwd   xmm4,xmm2
+  movd        xmm2,edx
+  movdqa      xmm5,xmm2
+  punpcklwd   xmm5,xmm2
+  pshufd      xmm2,xmm5,0
+  movdqa      [esp+50h],xmm2
+  movdqa      xmm2,xmm6
+  punpcklbw   xmm2,xmm1
+  movdqa      [esp+0D0h],xmm3
+  pshufd      xmm4,xmm4,0
+  movdqa      [esp+30h],xmm2
+  punpckhbw   xmm6,xmm1
+  movdqa      [esp+80h],xmm6
+  movdqa      xmm6,[esp+0D0h]
+  punpckhbw   xmm6,xmm1
+  movdqa      [esp+70h],xmm6
+  movdqa      xmm6, [esp+0E0h]
+  punpckhbw   xmm6,xmm1
+  movdqa     [esp+90h],xmm6
+  movdqa      xmm5, [esp+0E0h]
+  movdqa      xmm2,xmm7
+  punpckhbw   xmm7,xmm1
+  punpcklbw   xmm5,xmm1
+  movdqa       [esp+0A0h],xmm7
+  punpcklbw   xmm3,xmm1
+  mov         edx,4
+  punpcklbw   xmm2,xmm1
+  movsx       edx,dx
+  movd        xmm6,edx
+  movdqa      xmm7,xmm6
+  punpcklwd   xmm7,xmm6
+  pshufd      xmm6,xmm7,0
+  movdqa      xmm7,[esp+30h]
+  movdqa      [esp+20h],xmm6
+  psubw       xmm7,xmm5
+  movdqa      xmm6,xmm0
+  pcmpgtw     xmm6,xmm1
+  movdqa      xmm1,[esp+60h]
+  movdqa      [esp+40h],xmm6
+  movdqa      xmm6,xmm3
+  psubw       xmm6,xmm2
+  psllw       xmm6,2
+  paddw       xmm6,xmm7
+  paddw       xmm6, [esp+20h]
+  movdqa      xmm7, [esp+50h]
+  psraw       xmm6,3
+  pmaxsw      xmm1,xmm6
+  movdqa      [esp+10h],xmm0
+  movdqa      xmm6, [esp+10h]
+  pminsw      xmm6,xmm1
+  movdqa      [esp+10h],xmm6
+  movdqa      xmm1,xmm2
+  psubw       xmm1,xmm3
+  pabsw       xmm1,xmm1
+  movdqa      xmm6,xmm4
+  pcmpgtw     xmm6,xmm1
+  movdqa      xmm1, [esp+30h]
+  psubw       xmm1,xmm2
+  pabsw       xmm1,xmm1
+  pcmpgtw     xmm7,xmm1
+  movdqa      xmm1,[esp+50h]
+  pand        xmm6,xmm7
+  movdqa      xmm7,[esp+50h]
+  psubw       xmm5,xmm3
+  pabsw       xmm5,xmm5
+  pcmpgtw     xmm1,xmm5
+  movdqa      xmm5,[esp+80h]
+  psubw       xmm5,[esp+90h]
+  pand        xmm6,xmm1
+  pand        xmm6,[esp+40h]
+  movdqa      xmm1,[esp+10h]
+  pand        xmm1,xmm6
+  movdqa      xmm6,[esp+70h]
+  movdqa      [esp+30h],xmm1
+  movdqa      xmm1,[esp+0A0h]
+  psubw       xmm6,xmm1
+  psllw       xmm6,2
+  paddw       xmm6,xmm5
+  paddw       xmm6,[esp+20h]
+  movdqa      xmm5,[esp+60h]
+  psraw       xmm6,3
+  pmaxsw      xmm5,xmm6
+  pminsw      xmm0,xmm5
+  movdqa      xmm5,[esp+70h]
+  movdqa      xmm6,xmm1
+  psubw       xmm6,xmm5
+  pabsw       xmm6,xmm6
+  pcmpgtw     xmm4,xmm6
+  movdqa      xmm6,[esp+80h]
+  psubw       xmm6,xmm1
+  pabsw       xmm6,xmm6
+  pcmpgtw     xmm7,xmm6
+  movdqa      xmm6,[esp+90h]
+  pand        xmm4,xmm7
+  movdqa      xmm7,[esp+50h]
+  psubw       xmm6,xmm5
+  pabsw       xmm6,xmm6
+  pcmpgtw     xmm7,xmm6
+  pand        xmm4,xmm7
+  pand        xmm4,[esp+40h]
+  pand        xmm0,xmm4
+  movdqa      xmm4,[esp+30h]
+  paddw       xmm2,xmm4
+  paddw       xmm1,xmm0
+  packuswb    xmm2,xmm1
+  movq        [esi],xmm2
+  psubw       xmm3,xmm4
+  psubw       xmm5,xmm0
+  packuswb    xmm3,xmm5
+  movq        [eax],xmm3
+  psrldq      xmm2,8
+  movq        [edi],xmm2
+  pop         edi
+  pop         esi
+  psrldq      xmm3,8
+  movq        [ecx],xmm3
+  pop         ebx
+  mov         esp,ebp
+  pop         ebp
+  ret
+
+;***************************************************************************
+;  void DeblockChromaEq4H_sse2(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
+;          int32_t iAlpha, int32_t iBeta)
+;***************************************************************************
+
+WELS_EXTERN     DeblockChromaEq4H_sse2
+
+ALIGN  16
+
+DeblockChromaEq4H_sse2:
+  push        ebp
+  mov         ebp,esp
+  and         esp,0FFFFFFF0h
+  sub         esp,0C8h
+  mov         ecx,dword [ebp+8]
+  mov         edx,dword [ebp+0Ch]
+  mov         eax,dword [ebp+10h]
+  sub         ecx,2
+  sub         edx,2
+  push        esi
+  lea         esi,[eax+eax*2]
+  mov         dword [esp+18h],ecx
+  mov         dword [esp+4],edx
+  lea         ecx,[ecx+eax*4]
+  lea         edx,[edx+eax*4]
+  lea         eax,[esp+7Ch]
+  push        edi
+  mov         dword [esp+14h],esi
+  mov         dword [esp+18h],ecx
+  mov         dword [esp+0Ch],edx
+  mov         dword [esp+10h],eax
+  mov         esi,dword [esp+1Ch]
+  mov         ecx,dword [ebp+10h]
+  mov         edx,dword [esp+14h]
+  movd        xmm0,dword [esi]
+  movd        xmm1,dword [esi+ecx]
+  movd        xmm2,dword [esi+ecx*2]
+  movd        xmm3,dword [esi+edx]
+  mov         esi,dword  [esp+8]
+  movd        xmm4,dword [esi]
+  movd        xmm5,dword [esi+ecx]
+  movd        xmm6,dword [esi+ecx*2]
+  movd        xmm7,dword [esi+edx]
+  punpckldq   xmm0,xmm4
+  punpckldq   xmm1,xmm5
+  punpckldq   xmm2,xmm6
+  punpckldq   xmm3,xmm7
+  mov         esi,dword [esp+18h]
+  mov         edi,dword [esp+0Ch]
+  movd        xmm4,dword [esi]
+  movd        xmm5,dword [edi]
+  punpckldq   xmm4,xmm5
+  punpcklqdq  xmm0,xmm4
+  movd        xmm4,dword [esi+ecx]
+  movd        xmm5,dword [edi+ecx]
+  punpckldq   xmm4,xmm5
+  punpcklqdq  xmm1,xmm4
+  movd        xmm4,dword [esi+ecx*2]
+  movd        xmm5,dword [edi+ecx*2]
+  punpckldq   xmm4,xmm5
+  punpcklqdq  xmm2,xmm4
+  movd        xmm4,dword [esi+edx]
+  movd        xmm5,dword [edi+edx]
+  punpckldq   xmm4,xmm5
+  punpcklqdq  xmm3,xmm4
+  movdqa      xmm6,xmm0
+  punpcklbw   xmm0,xmm1
+  punpckhbw   xmm6,xmm1
+  movdqa      xmm7,xmm2
+  punpcklbw   xmm2,xmm3
+  punpckhbw   xmm7,xmm3
+  movdqa      xmm4,xmm0
+  movdqa      xmm5,xmm6
+  punpcklwd   xmm0,xmm2
+  punpckhwd   xmm4,xmm2
+  punpcklwd   xmm6,xmm7
+  punpckhwd   xmm5,xmm7
+  movdqa      xmm1,xmm0
+  movdqa      xmm2,xmm4
+  punpckldq   xmm0,xmm6
+  punpckhdq   xmm1,xmm6
+  punpckldq   xmm4,xmm5
+  punpckhdq   xmm2,xmm5
+  movdqa      xmm5,xmm0
+  movdqa      xmm6,xmm1
+  punpcklqdq  xmm0,xmm4
+  punpckhqdq  xmm5,xmm4
+  punpcklqdq  xmm1,xmm2
+  punpckhqdq  xmm6,xmm2
+  mov         edi,dword [esp+10h]
+  movdqa      [edi],xmm0
+  movdqa      [edi+10h],xmm5
+  movdqa      [edi+20h],xmm1
+  movdqa      [edi+30h],xmm6
+  movsx       ecx,word [ebp+14h]
+  movsx       edx,word [ebp+18h]
+  movdqa      xmm6,[esp+80h]
+  movdqa      xmm4,[esp+90h]
+  movdqa      xmm5,[esp+0A0h]
+  movdqa      xmm7,[esp+0B0h]
+  pxor        xmm0,xmm0
+  movd        xmm1,ecx
+  movdqa      xmm2,xmm1
+  punpcklwd   xmm2,xmm1
+  pshufd      xmm1,xmm2,0
+  movd        xmm2,edx
+  movdqa      xmm3,xmm2
+  punpcklwd   xmm3,xmm2
+  pshufd      xmm2,xmm3,0
+  movdqa      xmm3,xmm6
+  punpckhbw   xmm6,xmm0
+  movdqa      [esp+60h],xmm6
+  movdqa      xmm6,[esp+90h]
+  punpckhbw   xmm6,xmm0
+  movdqa      [esp+30h],xmm6
+  movdqa      xmm6,[esp+0A0h]
+  punpckhbw   xmm6,xmm0
+  movdqa      [esp+40h],xmm6
+  movdqa      xmm6,[esp+0B0h]
+  punpckhbw   xmm6,xmm0
+  movdqa      [esp+70h],xmm6
+  punpcklbw   xmm7,xmm0
+  punpcklbw   xmm4,xmm0
+  punpcklbw   xmm5,xmm0
+  punpcklbw   xmm3,xmm0
+  movdqa      [esp+50h],xmm7
+  movdqa      xmm6,xmm4
+  psubw       xmm6,xmm5
+  pabsw       xmm6,xmm6
+  movdqa      xmm0,xmm1
+  pcmpgtw     xmm0,xmm6
+  movdqa      xmm6,xmm3
+  psubw       xmm6,xmm4
+  pabsw       xmm6,xmm6
+  movdqa      xmm7,xmm2
+  pcmpgtw     xmm7,xmm6
+  movdqa      xmm6,[esp+50h]
+  psubw       xmm6,xmm5
+  pabsw       xmm6,xmm6
+  pand        xmm0,xmm7
+  movdqa      xmm7,xmm2
+  pcmpgtw     xmm7,xmm6
+  movdqa      xmm6,[esp+30h]
+  psubw       xmm6,[esp+40h]
+  pabsw       xmm6,xmm6
+  pcmpgtw     xmm1,xmm6
+  movdqa      xmm6,[esp+60h]
+  psubw       xmm6,[esp+30h]
+  pabsw       xmm6,xmm6
+  pand        xmm0,xmm7
+  movdqa      xmm7,xmm2
+  pcmpgtw     xmm7,xmm6
+  movdqa      xmm6,[esp+70h]
+  psubw       xmm6,[esp+40h]
+  pabsw       xmm6,xmm6
+  pand        xmm1,xmm7
+  pcmpgtw     xmm2,xmm6
+  pand        xmm1,xmm2
+  mov         eax,2
+  movsx       ecx,ax
+  movd        xmm2,ecx
+  movdqa      xmm6,xmm2
+  punpcklwd   xmm6,xmm2
+  pshufd      xmm2,xmm6,0
+  movdqa      [esp+20h],xmm2
+  movdqa      xmm2,xmm3
+  paddw       xmm2,xmm3
+  paddw       xmm2,xmm4
+  paddw       xmm2,[esp+50h]
+  paddw       xmm2,[esp+20h]
+  psraw       xmm2,2
+  movdqa      xmm6,xmm0
+  pand        xmm6,xmm2
+  movdqa      xmm2,xmm0
+  pandn       xmm2,xmm4
+  por         xmm6,xmm2
+  movdqa      xmm2,[esp+60h]
+  movdqa      xmm7,xmm2
+  paddw       xmm7,xmm2
+  paddw       xmm7,[esp+30h]
+  paddw       xmm7,[esp+70h]
+  paddw       xmm7,[esp+20h]
+  movdqa      xmm4,xmm1
+  movdqa      xmm2,xmm1
+  pandn       xmm2,[esp+30h]
+  psraw       xmm7,2
+  pand        xmm4,xmm7
+  por         xmm4,xmm2
+  movdqa      xmm2,[esp+50h]
+  packuswb    xmm6,xmm4
+  movdqa      [esp+90h],xmm6
+  movdqa      xmm6,xmm2
+  paddw       xmm6,xmm2
+  movdqa      xmm2,[esp+20h]
+  paddw       xmm6,xmm5
+  paddw       xmm6,xmm3
+  movdqa      xmm4,xmm0
+  pandn       xmm0,xmm5
+  paddw       xmm6,xmm2
+  psraw       xmm6,2
+  pand        xmm4,xmm6
+  por         xmm4,xmm0
+  movdqa      xmm0,[esp+70h]
+  movdqa      xmm5,xmm0
+  paddw       xmm5,xmm0
+  movdqa      xmm0,[esp+40h]
+  paddw       xmm5,xmm0
+  paddw       xmm5,[esp+60h]
+  movdqa      xmm3,xmm1
+  paddw       xmm5,xmm2
+  psraw       xmm5,2
+  pand        xmm3,xmm5
+  pandn       xmm1,xmm0
+  por         xmm3,xmm1
+  packuswb    xmm4,xmm3
+  movdqa      [esp+0A0h],xmm4
+  mov         esi,dword [esp+10h]
+  movdqa      xmm0,[esi]
+  movdqa      xmm1,[esi+10h]
+  movdqa      xmm2,[esi+20h]
+  movdqa      xmm3,[esi+30h]
+  movdqa      xmm6,xmm0
+  punpcklbw   xmm0,xmm1
+  punpckhbw   xmm6,xmm1
+  movdqa      xmm7,xmm2
+  punpcklbw   xmm2,xmm3
+  punpckhbw   xmm7,xmm3
+  movdqa      xmm4,xmm0
+  movdqa      xmm5,xmm6
+  punpcklwd   xmm0,xmm2
+  punpckhwd   xmm4,xmm2
+  punpcklwd   xmm6,xmm7
+  punpckhwd   xmm5,xmm7
+  movdqa      xmm1,xmm0
+  movdqa      xmm2,xmm4
+  punpckldq   xmm0,xmm6
+  punpckhdq   xmm1,xmm6
+  punpckldq   xmm4,xmm5
+  punpckhdq   xmm2,xmm5
+  movdqa      xmm5,xmm0
+  movdqa      xmm6,xmm1
+  punpcklqdq  xmm0,xmm4
+  punpckhqdq  xmm5,xmm4
+  punpcklqdq  xmm1,xmm2
+  punpckhqdq  xmm6,xmm2
+  mov         esi,dword [esp+1Ch]
+  mov         ecx,dword [ebp+10h]
+  mov         edx,dword [esp+14h]
+  mov         edi,dword [esp+8]
+  movd        dword [esi],xmm0
+  movd        dword [esi+ecx],xmm5
+  movd        dword [esi+ecx*2],xmm1
+  movd        dword [esi+edx],xmm6
+  psrldq      xmm0,4
+  psrldq      xmm5,4
+  psrldq      xmm1,4
+  psrldq      xmm6,4
+  mov         esi,dword [esp+18h]
+  movd        dword [edi],xmm0
+  movd        dword [edi+ecx],xmm5
+  movd        dword [edi+ecx*2],xmm1
+  movd        dword [edi+edx],xmm6
+  psrldq      xmm0,4
+  psrldq      xmm5,4
+  psrldq      xmm1,4
+  psrldq      xmm6,4
+  movd        dword [esi],xmm0
+  movd        dword [esi+ecx],xmm5
+  movd        dword [esi+ecx*2],xmm1
+  movd        dword [esi+edx],xmm6
+  psrldq      xmm0,4
+  psrldq      xmm5,4
+  psrldq      xmm1,4
+  psrldq      xmm6,4
+  mov         edi,dword [esp+0Ch]
+  movd        dword [edi],xmm0
+  movd        dword [edi+ecx],xmm5
+  movd        dword [edi+ecx*2],xmm1
+  movd        dword [edi+edx],xmm6
+  pop         edi
+  pop         esi
+  mov         esp,ebp
+  pop         ebp
+  ret
+
+;*******************************************************************************
+;    void DeblockChromaLt4H_sse2(uint8_t * pPixCb, uint8_t * pPixCr, int32_t iStride,
+;                                int32_t iAlpha, int32_t iBeta, int8_t * pTC);
+;*******************************************************************************
+
+WELS_EXTERN  DeblockChromaLt4H_sse2
+
+ALIGN  16
+
+DeblockChromaLt4H_sse2:
+  push        ebp
+  mov         ebp,esp
+  and         esp,0FFFFFFF0h
+  sub         esp,108h
+  mov         ecx,dword [ebp+8]
+  mov         edx,dword [ebp+0Ch]
+  mov         eax,dword [ebp+10h]
+  sub         ecx,2
+  sub         edx,2
+  push        esi
+  lea         esi,[eax+eax*2]
+  mov         dword [esp+10h],ecx
+  mov         dword [esp+4],edx
+  lea         ecx,[ecx+eax*4]
+  lea         edx,[edx+eax*4]
+  lea         eax,[esp+6Ch]
+  push        edi
+  mov         dword [esp+0Ch],esi
+  mov         dword [esp+18h],ecx
+  mov         dword [esp+10h],edx
+  mov         dword [esp+1Ch],eax
+  mov         esi,dword [esp+14h]
+  mov         ecx,dword [ebp+10h]
+  mov         edx,dword [esp+0Ch]
+  movd        xmm0,dword [esi]
+  movd        xmm1,dword [esi+ecx]
+  movd        xmm2,dword [esi+ecx*2]
+  movd        xmm3,dword [esi+edx]
+  mov         esi,dword [esp+8]
+  movd        xmm4,dword [esi]
+  movd        xmm5,dword [esi+ecx]
+  movd        xmm6,dword [esi+ecx*2]
+  movd        xmm7,dword [esi+edx]
+  punpckldq   xmm0,xmm4
+  punpckldq   xmm1,xmm5
+  punpckldq   xmm2,xmm6
+  punpckldq   xmm3,xmm7
+  mov         esi,dword [esp+18h]
+  mov         edi,dword [esp+10h]
+  movd        xmm4,dword [esi]
+  movd        xmm5,dword [edi]
+  punpckldq   xmm4,xmm5
+  punpcklqdq  xmm0,xmm4
+  movd        xmm4,dword [esi+ecx]
+  movd        xmm5,dword [edi+ecx]
+  punpckldq   xmm4,xmm5
+  punpcklqdq  xmm1,xmm4
+  movd        xmm4,dword [esi+ecx*2]
+  movd        xmm5,dword [edi+ecx*2]
+  punpckldq   xmm4,xmm5
+  punpcklqdq  xmm2,xmm4
+  movd        xmm4,dword [esi+edx]
+  movd        xmm5,dword [edi+edx]
+  punpckldq   xmm4,xmm5
+  punpcklqdq  xmm3,xmm4
+  movdqa      xmm6,xmm0
+  punpcklbw   xmm0,xmm1
+  punpckhbw   xmm6,xmm1
+  movdqa      xmm7,xmm2
+  punpcklbw   xmm2,xmm3
+  punpckhbw   xmm7,xmm3
+  movdqa      xmm4,xmm0
+  movdqa      xmm5,xmm6
+  punpcklwd   xmm0,xmm2
+  punpckhwd   xmm4,xmm2
+  punpcklwd   xmm6,xmm7
+  punpckhwd   xmm5,xmm7
+  movdqa      xmm1,xmm0
+  movdqa      xmm2,xmm4
+  punpckldq   xmm0,xmm6
+  punpckhdq   xmm1,xmm6
+  punpckldq   xmm4,xmm5
+  punpckhdq   xmm2,xmm5
+  movdqa      xmm5,xmm0
+  movdqa      xmm6,xmm1
+  punpcklqdq  xmm0,xmm4
+  punpckhqdq  xmm5,xmm4
+  punpcklqdq  xmm1,xmm2
+  punpckhqdq  xmm6,xmm2
+  mov         edi,dword [esp+1Ch]
+  movdqa      [edi],xmm0
+  movdqa      [edi+10h],xmm5
+  movdqa      [edi+20h],xmm1
+  movdqa      [edi+30h],xmm6
+  mov         eax,dword [ebp+1Ch]
+  movsx       cx,byte [eax+3]
+  movsx       dx,byte [eax+2]
+  movsx       si,byte [eax+1]
+  movsx       ax,byte [eax]
+  movzx       edi,cx
+  movzx       ecx,cx
+  movd        xmm2,ecx
+  movzx       ecx,dx
+  movzx       edx,dx
+  movd        xmm3,ecx
+  movd        xmm4,edx
+  movzx       ecx,si
+  movzx       edx,si
+  movd        xmm5,ecx
+  pxor        xmm0,xmm0
+  movd        xmm6,edx
+  movzx       ecx,ax
+  movdqa      [esp+60h],xmm0
+  movzx       edx,ax
+  movsx       eax,word [ebp+14h]
+  punpcklwd   xmm6,xmm2
+  movd        xmm1,edi
+  movd        xmm7,ecx
+  movsx       ecx,word [ebp+18h]
+  movd        xmm0,edx
+  punpcklwd   xmm7,xmm3
+  punpcklwd   xmm5,xmm1
+  movdqa      xmm1,[esp+60h]
+  punpcklwd   xmm7,xmm5
+  movdqa      xmm5,[esp+0A0h]
+  punpcklwd   xmm0,xmm4
+  punpcklwd   xmm0,xmm6
+  movdqa      xmm6, [esp+70h]
+  punpcklwd   xmm0,xmm7
+  movdqa      xmm7,[esp+80h]
+  movdqa      xmm2,xmm1
+  psubw       xmm2,xmm0
+  movdqa      [esp+0D0h],xmm2
+  movd        xmm2,eax
+  movdqa      xmm3,xmm2
+  punpcklwd   xmm3,xmm2
+  pshufd      xmm4,xmm3,0
+  movd        xmm2,ecx
+  movdqa      xmm3,xmm2
+  punpcklwd   xmm3,xmm2
+  pshufd      xmm2,xmm3,0
+  movdqa      xmm3, [esp+90h]
+  movdqa      [esp+50h],xmm2
+  movdqa      xmm2,xmm6
+  punpcklbw   xmm2,xmm1
+  punpckhbw   xmm6,xmm1
+  movdqa      [esp+40h],xmm2
+  movdqa      [esp+0B0h],xmm6
+  movdqa      xmm6,[esp+90h]
+  movdqa      xmm2,xmm7
+  punpckhbw   xmm7,xmm1
+  punpckhbw   xmm6,xmm1
+  punpcklbw   xmm2,xmm1
+  punpcklbw   xmm3,xmm1
+  punpcklbw   xmm5,xmm1
+  movdqa      [esp+0F0h],xmm7
+  movdqa      [esp+0C0h],xmm6
+  movdqa      xmm6, [esp+0A0h]
+  punpckhbw   xmm6,xmm1
+  movdqa      [esp+0E0h],xmm6
+  mov         edx,4
+  movsx       eax,dx
+  movd        xmm6,eax
+  movdqa      xmm7,xmm6
+  punpcklwd   xmm7,xmm6
+  pshufd      xmm6,xmm7,0
+  movdqa      [esp+30h],xmm6
+  movdqa      xmm7, [esp+40h]
+  psubw       xmm7,xmm5
+  movdqa      xmm6,xmm0
+  pcmpgtw     xmm6,xmm1
+  movdqa      [esp+60h],xmm6
+  movdqa      xmm1, [esp+0D0h]
+  movdqa      xmm6,xmm3
+  psubw       xmm6,xmm2
+  psllw       xmm6,2
+  paddw       xmm6,xmm7
+  paddw       xmm6,[esp+30h]
+  psraw       xmm6,3
+  pmaxsw      xmm1,xmm6
+  movdqa      xmm7,[esp+50h]
+  movdqa      [esp+20h],xmm0
+  movdqa      xmm6, [esp+20h]
+  pminsw      xmm6,xmm1
+  movdqa      [esp+20h],xmm6
+  movdqa      xmm6,xmm4
+  movdqa      xmm1,xmm2
+  psubw       xmm1,xmm3
+  pabsw       xmm1,xmm1
+  pcmpgtw     xmm6,xmm1
+  movdqa      xmm1, [esp+40h]
+  psubw       xmm1,xmm2
+  pabsw       xmm1,xmm1
+  pcmpgtw     xmm7,xmm1
+  movdqa      xmm1, [esp+50h]
+  pand        xmm6,xmm7
+  movdqa      xmm7, [esp+50h]
+  psubw       xmm5,xmm3
+  pabsw       xmm5,xmm5
+  pcmpgtw     xmm1,xmm5
+  movdqa      xmm5, [esp+0B0h]
+  psubw       xmm5,[esp+0E0h]
+  pand        xmm6,xmm1
+  pand        xmm6, [esp+60h]
+  movdqa      xmm1, [esp+20h]
+  pand        xmm1,xmm6
+  movdqa      xmm6, [esp+0C0h]
+  movdqa      [esp+40h],xmm1
+  movdqa      xmm1, [esp+0F0h]
+  psubw       xmm6,xmm1
+  psllw       xmm6,2
+  paddw       xmm6,xmm5
+  paddw       xmm6, [esp+30h]
+  movdqa      xmm5, [esp+0D0h]
+  psraw       xmm6,3
+  pmaxsw      xmm5,xmm6
+  pminsw      xmm0,xmm5
+  movdqa      xmm5,[esp+0C0h]
+  movdqa      xmm6,xmm1
+  psubw       xmm6,xmm5
+  pabsw       xmm6,xmm6
+  pcmpgtw     xmm4,xmm6
+  movdqa      xmm6,[esp+0B0h]
+  psubw       xmm6,xmm1
+  pabsw       xmm6,xmm6
+  pcmpgtw     xmm7,xmm6
+  movdqa      xmm6, [esp+0E0h]
+  pand        xmm4,xmm7
+  movdqa      xmm7, [esp+50h]
+  psubw       xmm6,xmm5
+  pabsw       xmm6,xmm6
+  pcmpgtw     xmm7,xmm6
+  pand        xmm4,xmm7
+  pand        xmm4,[esp+60h]
+  pand        xmm0,xmm4
+  movdqa      xmm4, [esp+40h]
+  paddw       xmm2,xmm4
+  paddw       xmm1,xmm0
+  psubw       xmm3,xmm4
+  psubw       xmm5,xmm0
+  packuswb    xmm2,xmm1
+  packuswb    xmm3,xmm5
+  movdqa      [esp+80h],xmm2
+  movdqa      [esp+90h],xmm3
+  mov         esi,dword [esp+1Ch]
+  movdqa      xmm0, [esi]
+  movdqa      xmm1, [esi+10h]
+  movdqa      xmm2, [esi+20h]
+  movdqa      xmm3, [esi+30h]
+  movdqa      xmm6,xmm0
+  punpcklbw   xmm0,xmm1
+  punpckhbw   xmm6,xmm1
+  movdqa      xmm7,xmm2
+  punpcklbw   xmm2,xmm3
+  punpckhbw   xmm7,xmm3
+  movdqa      xmm4,xmm0
+  movdqa      xmm5,xmm6
+  punpcklwd   xmm0,xmm2
+  punpckhwd   xmm4,xmm2
+  punpcklwd   xmm6,xmm7
+  punpckhwd   xmm5,xmm7
+  movdqa      xmm1,xmm0
+  movdqa      xmm2,xmm4
+  punpckldq   xmm0,xmm6
+  punpckhdq   xmm1,xmm6
+  punpckldq   xmm4,xmm5
+  punpckhdq   xmm2,xmm5
+  movdqa      xmm5,xmm0
+  movdqa      xmm6,xmm1
+  punpcklqdq  xmm0,xmm4
+  punpckhqdq  xmm5,xmm4
+  punpcklqdq  xmm1,xmm2
+  punpckhqdq  xmm6,xmm2
+  mov         esi,dword [esp+14h]
+  mov         ecx,dword [ebp+10h]
+  mov         edx,dword [esp+0Ch]
+  mov         edi,dword [esp+8]
+  movd        dword [esi],xmm0
+  movd        dword [esi+ecx],xmm5
+  movd        dword [esi+ecx*2],xmm1
+  movd        dword [esi+edx],xmm6
+  psrldq      xmm0,4
+  psrldq      xmm5,4
+  psrldq      xmm1,4
+  psrldq      xmm6,4
+  mov         esi,dword [esp+18h]
+  movd        dword [edi],xmm0
+  movd        dword [edi+ecx],xmm5
+  movd        dword [edi+ecx*2],xmm1
+  movd        dword [edi+edx],xmm6
+  psrldq      xmm0,4
+  psrldq      xmm5,4
+  psrldq      xmm1,4
+  psrldq      xmm6,4
+  movd        dword [esi],xmm0
+  movd        dword [esi+ecx],xmm5
+  movd        dword [esi+ecx*2],xmm1
+  movd        dword [esi+edx],xmm6
+  psrldq      xmm0,4
+  psrldq      xmm5,4
+  psrldq      xmm1,4
+  psrldq      xmm6,4
+  mov         edi,dword [esp+10h]
+  movd        dword [edi],xmm0
+  movd        dword [edi+ecx],xmm5
+  movd        dword [edi+ecx*2],xmm1
+  movd        dword [edi+edx],xmm6
+  pop         edi
+  pop         esi
+  mov         esp,ebp
+  pop         ebp
+  ret
+
+
+
+;*******************************************************************************
+;    void DeblockLumaLt4V_sse2(uint8_t * pPix, int32_t iStride, int32_t iAlpha,
+;                                 int32_t iBeta, int8_t * pTC)
+;*******************************************************************************
+
+
+WELS_EXTERN  DeblockLumaLt4V_sse2
+
+ALIGN  16
+
+DeblockLumaLt4V_sse2:
+    push	ebp
+	mov	ebp, esp
+	and	esp, -16				; fffffff0H
+	sub	esp, 420				; 000001a4H
+	mov	eax, dword [ebp+8]
+	mov	ecx, dword [ebp+12]
+
+	pxor	xmm0, xmm0
+	push	ebx
+	mov	edx, dword [ebp+24]
+	movdqa	[esp+424-384], xmm0
+	push	esi
+
+	lea	esi, [ecx+ecx*2]
+	push	edi
+	mov	edi, eax
+	sub	edi, esi
+	movdqa	xmm0, [edi]
+
+	lea	esi, [ecx+ecx]
+	movdqa	[esp+432-208], xmm0
+	mov	edi, eax
+	sub	edi, esi
+	movdqa	xmm0, [edi]
+	movdqa	[esp+448-208], xmm0
+
+	mov	ebx, eax
+	sub	ebx, ecx
+	movdqa	xmm0, [ebx]
+	movdqa	[esp+464-208], xmm0
+
+	movdqa	xmm0, [eax]
+
+	add	ecx, eax
+	movdqa	[esp+480-208], xmm0
+	movdqa	xmm0, [ecx]
+	mov	dword [esp+432-404], ecx
+
+	movsx	ecx, word [ebp+16]
+	movdqa	[esp+496-208], xmm0
+	movdqa	xmm0, [esi+eax]
+
+	movsx	si, byte [edx]
+	movdqa	[esp+512-208], xmm0
+	movd	xmm0, ecx
+	movsx	ecx, word [ebp+20]
+	movdqa	xmm1, xmm0
+	punpcklwd xmm1, xmm0
+	pshufd	xmm0, xmm1, 0
+	movdqa	[esp+432-112], xmm0
+	movd	xmm0, ecx
+	movsx	cx, byte [edx+1]
+	movdqa	xmm1, xmm0
+	punpcklwd xmm1, xmm0
+	mov	dword [esp+432-408], ebx
+	movzx	ebx, cx
+	pshufd	xmm0, xmm1, 0
+	movd	xmm1, ebx
+	movzx	ebx, cx
+	movd	xmm2, ebx
+	movzx	ebx, cx
+	movzx	ecx, cx
+	movd	xmm4, ecx
+	movzx	ecx, si
+	movd	xmm5, ecx
+	movzx	ecx, si
+	movd	xmm6, ecx
+	movzx	ecx, si
+	movd	xmm7, ecx
+	movzx	ecx, si
+	movdqa	[esp+432-336], xmm0
+	movd	xmm0, ecx
+
+	movsx	cx, byte [edx+3]
+	movsx	dx, byte [edx+2]
+	movd	xmm3, ebx
+	punpcklwd xmm0, xmm4
+	movzx	esi, cx
+	punpcklwd xmm6, xmm2
+	punpcklwd xmm5, xmm1
+	punpcklwd xmm0, xmm6
+	punpcklwd xmm7, xmm3
+	punpcklwd xmm7, xmm5
+	punpcklwd xmm0, xmm7
+	movdqa	[esp+432-400], xmm0
+	movd	xmm0, esi
+	movzx	esi, cx
+	movd	xmm2, esi
+	movzx	esi, cx
+	movzx	ecx, cx
+	movd	xmm4, ecx
+	movzx	ecx, dx
+	movd	xmm3, esi
+	movd	xmm5, ecx
+	punpcklwd xmm5, xmm0
+
+	movdqa	xmm0, [esp+432-384]
+	movzx	ecx, dx
+	movd	xmm6, ecx
+	movzx	ecx, dx
+	movzx	edx, dx
+	punpcklwd xmm6, xmm2
+	movd	xmm7, ecx
+	movd	xmm1, edx
+
+	movdqa	xmm2, [esp+448-208]
+	punpcklbw xmm2, xmm0
+
+	mov	ecx, 4
+	movsx	edx, cx
+	punpcklwd xmm7, xmm3
+	punpcklwd xmm7, xmm5
+	movdqa	xmm5, [esp+496-208]
+	movdqa	xmm3, [esp+464-208]
+	punpcklbw xmm5, xmm0
+	movdqa	[esp+432-240], xmm5
+	movdqa	xmm5, [esp+512-208]
+	punpcklbw xmm5, xmm0
+	movdqa	[esp+432-352], xmm5
+	punpcklwd xmm1, xmm4
+	movdqa	xmm4, [esp+432-208]
+	punpcklwd xmm1, xmm6
+	movdqa	xmm6, [esp+480-208]
+	punpcklwd xmm1, xmm7
+	punpcklbw xmm6, xmm0
+	punpcklbw xmm3, xmm0
+	punpcklbw xmm4, xmm0
+	movdqa	xmm7, xmm3
+	psubw	xmm7, xmm4
+	pabsw	xmm7, xmm7
+	movdqa	[esp+432-272], xmm4
+	movdqa	xmm4, [esp+432-336]
+	movdqa	xmm5, xmm4
+	pcmpgtw	xmm5, xmm7
+	movdqa	[esp+432-288], xmm5
+	movdqa	xmm7, xmm6
+	psubw	xmm7, [esp+432-352]
+	pabsw	xmm7, xmm7
+	movdqa	xmm5, xmm4
+	pcmpgtw	xmm5, xmm7
+	movdqa	[esp+432-256], xmm5
+	movdqa	xmm5, xmm3
+	pavgw	xmm5, xmm6
+	movdqa	[esp+432-304], xmm5
+	movdqa	xmm5, [esp+432-400]
+	psubw	xmm5, [esp+432-288]
+	psubw	xmm5, [esp+432-256]
+	movdqa	[esp+432-224], xmm5
+	movdqa	xmm5, xmm6
+	psubw	xmm5, xmm3
+	movdqa	[esp+432-32], xmm6
+	psubw	xmm6, [esp+432-240]
+	movdqa	xmm7, xmm5
+	movdqa	[esp+432-384], xmm5
+	movdqa	xmm5, [esp+432-112]
+	pabsw	xmm7, xmm7
+	pcmpgtw	xmm5, xmm7
+	pabsw	xmm6, xmm6
+	movdqa	xmm7, xmm4
+	pcmpgtw	xmm7, xmm6
+
+	pand	xmm5, xmm7
+	movdqa	xmm6, xmm3
+	psubw	xmm6, xmm2
+	pabsw	xmm6, xmm6
+	movdqa	xmm7, xmm4
+	pcmpgtw	xmm7, xmm6
+	movdqa	xmm6, [esp+432-400]
+	pand	xmm5, xmm7
+	movdqa	xmm7, xmm6
+	pcmpeqw	xmm6, xmm0
+	pcmpgtw	xmm7, xmm0
+	por	xmm7, xmm6
+	pand	xmm5, xmm7
+	movdqa	[esp+432-320], xmm5
+	movd	xmm5, edx
+	movdqa	xmm6, xmm5
+	punpcklwd xmm6, xmm5
+	pshufd	xmm5, xmm6, 0
+	movdqa	[esp+432-336], xmm5
+	movdqa	xmm5, [esp+432-224]
+	movdqa	[esp+432-368], xmm5
+	movdqa	xmm6, xmm0
+	psubw	xmm6, xmm5
+	movdqa	xmm5, [esp+432-384]
+	psllw	xmm5, 2
+	movdqa	xmm7, xmm2
+	psubw	xmm7, [esp+432-240]
+	paddw	xmm7, xmm5
+	paddw	xmm7, [esp+432-336]
+	movdqa	xmm5, [esp+432-368]
+	psraw	xmm7, 3
+	pmaxsw	xmm6, xmm7
+	pminsw	xmm5, xmm6
+
+	pand	xmm5, [esp+432-320]
+	movdqa	xmm6, [esp+432-400]
+	movdqa	[esp+432-64], xmm5
+	movdqa	[esp+432-384], xmm6
+	movdqa	xmm5, xmm0
+	psubw	xmm5, xmm6
+	movdqa	[esp+432-368], xmm5
+	movdqa	xmm6, xmm5
+	movdqa	xmm5, [esp+432-272]
+	paddw	xmm5, [esp+432-304]
+	movdqa	xmm7, xmm2
+	paddw	xmm7, xmm2
+	psubw	xmm5, xmm7
+	psraw	xmm5, 1
+	pmaxsw	xmm6, xmm5
+	movdqa	xmm5, [esp+432-384]
+	pminsw	xmm5, xmm6
+
+	pand	xmm5, [esp+432-320]
+	pand	xmm5, [esp+432-288]
+	movdqa	xmm6, [esp+432-240]
+	movdqa	[esp+432-96], xmm5
+	movdqa	xmm5, [esp+432-352]
+	paddw	xmm5, [esp+432-304]
+	movdqa	xmm7, xmm6
+	paddw	xmm7, xmm6
+	movdqa	xmm6, [esp+432-368]
+	psubw	xmm5, xmm7
+
+	movdqa	xmm7, [esp+496-208]
+	psraw	xmm5, 1
+	pmaxsw	xmm6, xmm5
+	movdqa	xmm5, [esp+432-400]
+	pminsw	xmm5, xmm6
+	pand	xmm5, [esp+432-320]
+	pand	xmm5, [esp+432-256]
+	movdqa	xmm6, [esp+448-208]
+	punpckhbw xmm7, xmm0
+	movdqa	[esp+432-352], xmm7
+
+	movdqa	xmm7, [esp+512-208]
+	punpckhbw xmm6, xmm0
+	movdqa	[esp+432-48], xmm5
+	movdqa	xmm5, [esp+432-208]
+	movdqa	[esp+432-368], xmm6
+	movdqa	xmm6, [esp+464-208]
+	punpckhbw xmm7, xmm0
+	punpckhbw xmm5, xmm0
+	movdqa	[esp+432-384], xmm7
+	punpckhbw xmm6, xmm0
+	movdqa	[esp+432-400], xmm6
+
+	movdqa	xmm7, [esp+432-400]
+	movdqa	xmm6, [esp+480-208]
+	psubw	xmm7, xmm5
+	movdqa	[esp+432-16], xmm5
+	pabsw	xmm7, xmm7
+	punpckhbw xmm6, xmm0
+	movdqa	xmm5, xmm4
+	pcmpgtw	xmm5, xmm7
+	movdqa	[esp+432-288], xmm5
+
+	movdqa	xmm7, xmm6
+	psubw	xmm7, [esp+432-384]
+	pabsw	xmm7, xmm7
+	movdqa	xmm5, xmm4
+	pcmpgtw	xmm5, xmm7
+	movdqa	[esp+432-256], xmm5
+
+	movdqa	xmm5, [esp+432-400]
+	movdqa	[esp+432-80], xmm6
+	pavgw	xmm5, xmm6
+	movdqa	[esp+432-304], xmm5
+
+	movdqa	xmm5, xmm1
+	psubw	xmm5, [esp+432-288]
+	psubw	xmm5, [esp+432-256]
+	movdqa	[esp+432-224], xmm5
+	movdqa	xmm5, xmm6
+	psubw	xmm5, [esp+432-400]
+	psubw	xmm6, [esp+432-352]
+	movdqa	[esp+432-272], xmm5
+	movdqa	xmm7, xmm5
+	movdqa	xmm5, [esp+432-112]
+	pabsw	xmm7, xmm7
+	pcmpgtw	xmm5, xmm7
+	movdqa	xmm7, xmm4
+	pabsw	xmm6, xmm6
+	pcmpgtw	xmm7, xmm6
+	movdqa	xmm6, [esp+432-368]
+
+	pand	xmm5, xmm7
+	movdqa	xmm7, [esp+432-400]
+	psubw	xmm7, xmm6
+	psubw	xmm6, [esp+432-352]
+	pabsw	xmm7, xmm7
+	pcmpgtw	xmm4, xmm7
+	pand	xmm5, xmm4
+
+	paddw	xmm2, [esp+432-96]
+	movdqa	xmm4, xmm1
+	pcmpgtw	xmm4, xmm0
+	movdqa	xmm7, xmm1
+	pcmpeqw	xmm7, xmm0
+	por	xmm4, xmm7
+	pand	xmm5, xmm4
+	movdqa	xmm4, [esp+432-224]
+	movdqa	[esp+432-320], xmm5
+	movdqa	xmm5, [esp+432-272]
+	movdqa	xmm7, xmm0
+	psubw	xmm7, xmm4
+	psubw	xmm0, xmm1
+	psllw	xmm5, 2
+	paddw	xmm6, xmm5
+	paddw	xmm6, [esp+432-336]
+	movdqa	xmm5, [esp+432-368]
+	movdqa	[esp+432-336], xmm0
+	psraw	xmm6, 3
+	pmaxsw	xmm7, xmm6
+	pminsw	xmm4, xmm7
+	pand	xmm4, [esp+432-320]
+	movdqa	xmm6, xmm0
+	movdqa	xmm0, [esp+432-16]
+	paddw	xmm0, [esp+432-304]
+	movdqa	[esp+432-272], xmm4
+	movdqa	xmm4, [esp+432-368]
+	paddw	xmm4, xmm4
+	psubw	xmm0, xmm4
+
+	movdqa	xmm4, [esp+432-64]
+	psraw	xmm0, 1
+	pmaxsw	xmm6, xmm0
+	movdqa	xmm0, [esp+432-400]
+	movdqa	xmm7, xmm1
+	pminsw	xmm7, xmm6
+	movdqa	xmm6, [esp+432-320]
+	pand	xmm7, xmm6
+	pand	xmm7, [esp+432-288]
+	paddw	xmm5, xmm7
+	packuswb xmm2, xmm5
+	movdqa	xmm5, [esp+432-272]
+	paddw	xmm0, xmm5
+	paddw	xmm3, xmm4
+	packuswb xmm3, xmm0
+
+	movdqa	xmm0, [esp+432-32]
+	psubw	xmm0, xmm4
+	movdqa	xmm4, [esp+432-80]
+	psubw	xmm4, xmm5
+
+	movdqa	xmm5, [esp+432-240]
+	paddw	xmm5, [esp+432-48]
+	packuswb xmm0, xmm4
+	movdqa	xmm4, [esp+432-384]
+	paddw	xmm4, [esp+432-304]
+	movdqa	[esp+480-208], xmm0
+	movdqa	xmm0, [esp+432-352]
+	movdqa	xmm7, xmm0
+	paddw	xmm0, xmm0
+
+	mov	ecx, dword [esp+432-408]
+
+	mov	edx, dword [esp+432-404]
+	psubw	xmm4, xmm0
+	movdqa	xmm0, [esp+432-336]
+	movdqa	[edi], xmm2
+	psraw	xmm4, 1
+	pmaxsw	xmm0, xmm4
+	pminsw	xmm1, xmm0
+	movdqa	xmm0, [esp+480-208]
+
+	pop	edi
+	pand	xmm1, xmm6
+	pand	xmm1, [esp+428-256]
+	movdqa	[ecx], xmm3
+	paddw	xmm7, xmm1
+	pop	esi
+	packuswb xmm5, xmm7
+	movdqa	[eax], xmm0
+	movdqa	[edx], xmm5
+	pop	ebx
+	mov	esp, ebp
+	pop	ebp
+	ret
+
+
+;*******************************************************************************
+;    void DeblockLumaEq4V_sse2(uint8_t * pPix, int32_t iStride, int32_t iAlpha,
+;                                 int32_t iBeta)
+;*******************************************************************************
+
+WELS_EXTERN  DeblockLumaEq4V_sse2
+
+ALIGN  16
+
+DeblockLumaEq4V_sse2:
+
+	push	ebp
+	mov	ebp, esp
+	and	esp, -16				; fffffff0H
+	sub	esp, 628				; 00000274H
+	mov	eax, dword [ebp+8]
+	mov	ecx, dword [ebp+12]
+	push	ebx
+	push	esi
+
+	lea	edx, [ecx*4]
+	pxor	xmm0, xmm0
+	movdqa	xmm2, xmm0
+
+	movdqa	xmm0, [ecx+eax]
+	mov	esi, eax
+	sub	esi, edx
+	movdqa	xmm3, [esi]
+	movdqa	xmm5, [eax]
+	push	edi
+	lea	edi, [ecx+ecx]
+	lea	ebx, [ecx+ecx*2]
+	mov	dword [esp+640-600], edi
+	mov	esi, eax
+	sub	esi, edi
+	movdqa	xmm1, [esi]
+	movdqa	 [esp+720-272], xmm0
+	mov	edi, eax
+	sub	edi, ecx
+	movdqa	xmm4, [edi]
+	add	ecx, eax
+	mov	dword [esp+640-596], ecx
+
+	mov	ecx, dword [esp+640-600]
+	movdqa	xmm0, [ecx+eax]
+	movdqa	 [esp+736-272], xmm0
+
+	movdqa	xmm0, [eax+ebx]
+	mov	edx, eax
+	sub	edx, ebx
+
+	movsx	ebx, word [ebp+16]
+	movdqa	xmm6, [edx]
+	add	ecx, eax
+	movdqa	 [esp+752-272], xmm0
+	movd	xmm0, ebx
+
+	movsx	ebx, word [ebp+20]
+	movdqa	xmm7, xmm0
+	punpcklwd xmm7, xmm0
+	pshufd	xmm0, xmm7, 0
+	movdqa	 [esp+640-320], xmm0
+	movd	xmm0, ebx
+	movdqa	xmm7, xmm0
+	punpcklwd xmm7, xmm0
+	pshufd	xmm0, xmm7, 0
+
+	movdqa	xmm7, [esp+736-272]
+	punpcklbw xmm7, xmm2
+	movdqa	 [esp+640-416], xmm7
+	movdqa	 [esp+640-512], xmm0
+	movdqa	xmm0, xmm1
+	movdqa	 [esp+672-272], xmm1
+	movdqa	xmm1, xmm4
+	movdqa	 [esp+704-272], xmm5
+	punpcklbw xmm5, xmm2
+	punpcklbw xmm1, xmm2
+
+	movdqa	xmm7, xmm5
+	psubw	xmm7, xmm1
+	pabsw	xmm7, xmm7
+	movdqa	 [esp+640-560], xmm7
+	punpcklbw xmm0, xmm2
+	movdqa	 [esp+688-272], xmm4
+	movdqa	xmm4, [esp+720-272]
+	movdqa	 [esp+640-480], xmm0
+
+	movdqa	xmm7, xmm1
+	psubw	xmm7, xmm0
+
+	movdqa	xmm0, [esp+640-512]
+	pabsw	xmm7, xmm7
+	punpcklbw xmm4, xmm2
+	pcmpgtw	xmm0, xmm7
+	movdqa	 [esp+640-384], xmm4
+	movdqa	xmm7, xmm5
+	psubw	xmm7, xmm4
+	movdqa	xmm4, [esp+640-512]
+	movdqa	 [esp+656-272], xmm6
+	punpcklbw xmm6, xmm2
+	pabsw	xmm7, xmm7
+	movdqa	 [esp+640-48], xmm2
+	movdqa	 [esp+640-368], xmm6
+	movdqa	 [esp+640-144], xmm1
+	movdqa	 [esp+640-400], xmm5
+	pcmpgtw	xmm4, xmm7
+	pand	xmm0, xmm4
+	movdqa	xmm4, [esp+640-320]
+	pcmpgtw	xmm4, [esp+640-560]
+	pand	xmm0, xmm4
+
+	mov	ebx, 2
+	movsx	ebx, bx
+	movd	xmm4, ebx
+	movdqa	xmm7, xmm4
+	punpcklwd xmm7, xmm4
+	movdqa	xmm4, [esp+640-320]
+	psraw	xmm4, 2
+	pshufd	xmm7, xmm7, 0
+	paddw	xmm4, xmm7
+	movdqa	 [esp+640-576], xmm4
+	pcmpgtw	xmm4, [esp+640-560]
+	movdqa	 [esp+640-560], xmm4
+
+	movdqa	xmm4, [esp+640-512]
+	movdqa	 [esp+640-624], xmm7
+	movdqa	xmm7, xmm1
+	psubw	xmm7, xmm6
+	pabsw	xmm7, xmm7
+	pcmpgtw	xmm4, xmm7
+
+	pand	xmm4, [esp+640-560]
+	movdqa	 [esp+640-544], xmm4
+	movdqa	xmm4, [esp+640-512]
+	movdqa	xmm7, xmm5
+	psubw	xmm7, [esp+640-416]
+	pabsw	xmm7, xmm7
+	pcmpgtw	xmm4, xmm7
+
+	pand	xmm4, [esp+640-560]
+	movdqa	 [esp+640-560], xmm4
+
+	movdqa	xmm4, [esp+640-544]
+	pandn	xmm4, xmm6
+	movdqa	 [esp+640-16], xmm4
+	mov	ebx, 4
+	movsx	ebx, bx
+	movd	xmm4, ebx
+	movdqa	xmm7, xmm4
+	punpcklwd xmm7, xmm4
+	movdqa	xmm4, xmm3
+	punpcklbw xmm4, xmm2
+	psllw	xmm4, 1
+	paddw	xmm4, xmm6
+	paddw	xmm4, xmm6
+	paddw	xmm4, xmm6
+	paddw	xmm4, [esp+640-480]
+
+	movdqa	xmm6, [esp+640-560]
+	pshufd	xmm7, xmm7, 0
+	paddw	xmm4, xmm1
+	movdqa	 [esp+640-592], xmm7
+	paddw	xmm4, xmm5
+	paddw	xmm4, xmm7
+	movdqa	xmm7, [esp+640-416]
+	pandn	xmm6, xmm7
+	movdqa	 [esp+640-80], xmm6
+	movdqa	xmm6, [esp+752-272]
+	punpcklbw xmm6, xmm2
+	psllw	xmm6, 1
+	paddw	xmm6, xmm7
+	paddw	xmm6, xmm7
+	paddw	xmm6, xmm7
+	paddw	xmm6, [esp+640-384]
+
+	movdqa	xmm7, [esp+640-480]
+	paddw	xmm6, xmm5
+	paddw	xmm6, xmm1
+	paddw	xmm6, [esp+640-592]
+	psraw	xmm6, 3
+	pand	xmm6, [esp+640-560]
+	movdqa	 [esp+640-112], xmm6
+	movdqa	xmm6, [esp+640-544]
+	pandn	xmm6, xmm7
+	movdqa	 [esp+640-336], xmm6
+	movdqa	xmm6, [esp+640-544]
+	movdqa	 [esp+640-528], xmm6
+	movdqa	xmm6, [esp+640-368]
+	paddw	xmm6, xmm7
+	movdqa	xmm7, xmm1
+	psraw	xmm4, 3
+	pand	xmm4, [esp+640-544]
+	paddw	xmm7, xmm5
+	paddw	xmm6, xmm7
+	paddw	xmm6, [esp+640-624]
+	movdqa	xmm7, [esp+640-528]
+
+	paddw	xmm5, xmm1
+	psraw	xmm6, 2
+	pand	xmm7, xmm6
+
+	movdqa	xmm6, [esp+640-384]
+	movdqa	 [esp+640-64], xmm7
+	movdqa	xmm7, [esp+640-560]
+	pandn	xmm7, xmm6
+	movdqa	 [esp+640-304], xmm7
+	movdqa	xmm7, [esp+640-560]
+	movdqa	 [esp+640-528], xmm7
+	movdqa	xmm7, [esp+640-416]
+	paddw	xmm7, xmm6
+	paddw	xmm7, xmm5
+	paddw	xmm7, [esp+640-624]
+	movdqa	xmm5, [esp+640-528]
+	psraw	xmm7, 2
+	pand	xmm5, xmm7
+	movdqa	 [esp+640-32], xmm5
+
+	movdqa	xmm5, [esp+640-544]
+	movdqa	 [esp+640-528], xmm5
+	movdqa	xmm5, [esp+640-480]
+	movdqa	xmm7, xmm5
+	paddw	xmm7, xmm5
+	movdqa	xmm5, xmm1
+	paddw	xmm5, xmm6
+	paddw	xmm6, [esp+640-592]
+	paddw	xmm7, xmm5
+	paddw	xmm7, [esp+640-624]
+	movdqa	xmm5, [esp+640-528]
+	psraw	xmm7, 2
+	pandn	xmm5, xmm7
+	movdqa	xmm7, [esp+640-480]
+	paddw	xmm7, xmm1
+	paddw	xmm7, [esp+640-400]
+	movdqa	xmm1, [esp+640-544]
+	movdqa	 [esp+640-352], xmm5
+	movdqa	xmm5, [esp+640-368]
+	psllw	xmm7, 1
+	paddw	xmm7, xmm6
+	paddw	xmm5, xmm7
+
+	movdqa	xmm7, [esp+640-400]
+	psraw	xmm5, 3
+	pand	xmm1, xmm5
+	movdqa	xmm5, [esp+640-480]
+	movdqa	 [esp+640-96], xmm1
+	movdqa	xmm1, [esp+640-560]
+	movdqa	 [esp+640-528], xmm1
+	movdqa	xmm1, [esp+640-384]
+	movdqa	xmm6, xmm1
+	paddw	xmm6, xmm1
+	paddw	xmm1, [esp+640-400]
+	paddw	xmm1, [esp+640-144]
+	paddw	xmm7, xmm5
+	paddw	xmm5, [esp+640-592]
+	paddw	xmm6, xmm7
+	paddw	xmm6, [esp+640-624]
+	movdqa	xmm7, [esp+640-528]
+	psraw	xmm6, 2
+	psllw	xmm1, 1
+	paddw	xmm1, xmm5
+
+	movdqa	xmm5, [esp+656-272]
+	pandn	xmm7, xmm6
+	movdqa	xmm6, [esp+640-416]
+	paddw	xmm6, xmm1
+	movdqa	xmm1, [esp+640-560]
+	psraw	xmm6, 3
+	pand	xmm1, xmm6
+
+	movdqa	xmm6, [esp+704-272]
+	movdqa	 [esp+640-128], xmm1
+	movdqa	xmm1, [esp+672-