ref: 2d675cf741bebc04b6586548c2bf5d2578f6a59d
parent: 4062fa9d34c6e9bc3d5b7ea491bcf9096d323011
parent: 1c3c560a0557c83ceea7754091b3f5e1e800aa21
author: ruil2 <ruil2@cisco.com>
date: Thu Apr 17 11:19:29 EDT 2014
Merge pull request #704 from JuannyWang/sampleNow [Encoder UT] sample UT code refactor
--- /dev/null
+++ b/test/encoder/EncUT_Sample.cpp
@@ -1,0 +1,704 @@
+#include<gtest/gtest.h>
+#include<math.h>
+#include<stdlib.h>
+#include<time.h>
+
+#include "cpu_core.h"
+#include "cpu.h"
+#include "sample.h"
+#include "sad_common.h"
+
+using namespace WelsSVCEnc;
+
+#define ASSERT_MEMORY_FAIL2X(A, B) \
+ if (NULL == B) { \
+ pMemAlign->WelsFree(A, "Sad_SrcA");\
+ ASSERT_TRUE(0); \
+ }
+
+#define ASSERT_MEMORY_FAIL3X(A, B, C) \
+ if (NULL == C) { \
+ pMemAlign->WelsFree(A, "Sad_SrcA"); \
+ pMemAlign->WelsFree(B, "Sad_SrcB"); \
+ ASSERT_TRUE(0); \
+ }
+
+#define PIXEL_STRIDE 32
+
+class SadSatdCFuncTest : public testing::Test {
+public:
+ virtual void SetUp() {
+ pMemAlign = new CMemoryAlign(0);
+
+ srand((uint32_t)time(NULL));
+ m_iStrideA = rand()%256 + PIXEL_STRIDE;
+ m_iStrideB = rand()%256 + PIXEL_STRIDE;
+ m_pPixSrcA = (uint8_t *)pMemAlign->WelsMalloc(m_iStrideA<<5,"Sad_m_pPixSrcA");
+ ASSERT_TRUE(NULL != m_pPixSrcA);
+ m_pPixSrcB = (uint8_t *)pMemAlign->WelsMalloc(m_iStrideB<<5,"Sad_m_pPixSrcB");
+ ASSERT_MEMORY_FAIL2X(m_pPixSrcA, m_pPixSrcB)
+ m_pSad = (int32_t*)pMemAlign->WelsMalloc(4*sizeof(int32_t), "m_pSad");
+ ASSERT_MEMORY_FAIL3X(m_pPixSrcA, m_pPixSrcB, m_pSad)
+ }
+ virtual void TearDown() {
+ pMemAlign->WelsFree(m_pPixSrcA,"Sad_m_pPixSrcA");
+ pMemAlign->WelsFree(m_pPixSrcB,"Sad_m_pPixSrcB");
+ pMemAlign->WelsFree(m_pSad,"m_pSad");
+ delete pMemAlign;
+ }
+public:
+ uint8_t* m_pPixSrcA;
+ uint8_t* m_pPixSrcB;
+ int32_t m_iStrideA;
+ int32_t m_iStrideB;
+ int32_t* m_pSad;
+
+ CMemoryAlign* pMemAlign;
+};
+
+TEST_F(SadSatdCFuncTest, WelsSampleSad4x4_c) {
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<2); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<2); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i < 4; i++ ) {
+ for(int j=0; j<4; j++)
+ iSumSad+=abs(pPixA[j]-pPixB[j]);
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+ EXPECT_EQ(WelsSampleSad4x4_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
+}
+
+TEST_F(SadSatdCFuncTest, WelsSampleSad8x8_c) {
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<3); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<3); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i < 8; i++ ) {
+ for(int j=0; j<8; j++)
+ iSumSad+=abs(pPixA[j]-pPixB[j]);
+
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+ EXPECT_EQ(WelsSampleSad8x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
+}
+
+TEST_F(SadSatdCFuncTest, WelsSampleSad16x8_c) {
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<3); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<3); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <8; i++ ) {
+ for(int j=0; j<16; j++)
+ iSumSad+=abs(pPixA[j]-pPixB[j]);
+
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+ EXPECT_EQ(WelsSampleSad16x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
+}
+
+TEST_F(SadSatdCFuncTest, WelsSampleSad8x16_c) {
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<4); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<4); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <16; i++ ) {
+ for(int j=0; j<8; j++)
+ iSumSad+=abs(pPixA[j]-pPixB[j]);
+
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+ EXPECT_EQ(WelsSampleSad8x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
+}
+
+TEST_F(SadSatdCFuncTest, WelsSampleSad16x16_c) {
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<4); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<4); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <16; i++ ) {
+ for(int j=0; j<16; j++)
+ iSumSad+=abs(pPixA[j]-pPixB[j]);
+
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+ EXPECT_EQ(WelsSampleSad16x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
+}
+
+TEST_F(SadSatdCFuncTest, WelsSampleSatd4x4_c) {
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<2); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<2); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB;
+
+ int32_t W[16],T[16],Y[16],k=0;
+ for(int i=0; i<4; i++) {
+ for(int j=0; j<4; j++)
+ W[k++]=pPixA[j]-pPixB[j];
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+
+ T[0]=W[0]+W[4]+W[8]+W[12];
+ T[1]=W[1]+W[5]+W[9]+W[13];
+ T[2]=W[2]+W[6]+W[10]+W[14];
+ T[3]=W[3]+W[7]+W[11]+W[15];
+
+ T[4]=W[0]+W[4]-W[8]-W[12];
+ T[5]=W[1]+W[5]-W[9]-W[13];
+ T[6]=W[2]+W[6]-W[10]-W[14];
+ T[7]=W[3]+W[7]-W[11]-W[15];
+
+ T[8]=W[0]-W[4]-W[8]+W[12];
+ T[9]=W[1]-W[5]-W[9]+W[13];
+ T[10]=W[2]-W[6]-W[10]+W[14];
+ T[11]=W[3]-W[7]-W[11]+W[15];
+
+ T[12]=W[0]-W[4]+W[8]-W[12];
+ T[13]=W[1]-W[5]+W[9]-W[13];
+ T[14]=W[2]-W[6]+W[10]-W[14];
+ T[15]=W[3]-W[7]+W[11]-W[15];
+
+ Y[0]=T[0]+T[1]+T[2]+T[3];
+ Y[1]=T[0]+T[1]-T[2]-T[3];
+ Y[2]=T[0]-T[1]-T[2]+T[3];
+ Y[3]=T[0]-T[1]+T[2]-T[3];
+
+ Y[4]=T[4]+T[5]+T[6]+T[7];
+ Y[5]=T[4]+T[5]-T[6]-T[7];
+ Y[6]=T[4]-T[5]-T[6]+T[7];
+ Y[7]=T[4]-T[5]+T[6]-T[7];
+
+ Y[8]=T[8]+T[9]+T[10]+T[11];
+ Y[9]=T[8]+T[9]-T[10]-T[11];
+ Y[10]=T[8]-T[9]-T[10]+T[11];
+ Y[11]=T[8]-T[9]+T[10]-T[11];
+
+ Y[12]=T[12]+T[13]+T[14]+T[15];
+ Y[13]=T[12]+T[13]-T[14]-T[15];
+ Y[14]=T[12]-T[13]-T[14]+T[15];
+ Y[15]=T[12]-T[13]+T[14]-T[15];
+
+ int32_t iSumSatd = 0;
+ for(int i=0; i<16; i++)
+ iSumSatd+=abs(Y[i]);
+
+ EXPECT_EQ(WelsSampleSatd4x4_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), (iSumSatd+1)>>1);
+}
+
+TEST_F(SadSatdCFuncTest, WelsSampleSadFour16x16_c) {
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<5); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<5); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB+m_iStrideB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <16; i++ ) {
+ for(int j=0; j<16; j++) {
+ iSumSad+=abs(pPixA[j]-pPixB[j-1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]);
+ }
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+ WelsSampleSadFour16x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad);
+ EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad);
+}
+
+TEST_F(SadSatdCFuncTest, WelsSampleSadFour16x8_c) {
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<5); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<5); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB+m_iStrideB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <8; i++ ) {
+ for(int j=0; j<16; j++) {
+ iSumSad+=abs(pPixA[j]-pPixB[j-1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]);
+ }
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+
+ WelsSampleSadFour16x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad);
+ EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad);
+}
+
+TEST_F(SadSatdCFuncTest, WelsSampleSadFour8x16_c) {
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<5); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<5); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB+m_iStrideB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <16; i++ ) {
+ for(int j=0; j<8; j++) {
+ iSumSad+=abs(pPixA[j]-pPixB[j-1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]);
+ }
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+
+ WelsSampleSadFour8x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad);
+ EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad);
+}
+
+TEST_F(SadSatdCFuncTest, WelsSampleSadFour8x8_c) {
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<4); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<4); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB+m_iStrideB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <8; i++ ) {
+ for(int j=0; j<8; j++) {
+ iSumSad+=abs(pPixA[j]-pPixB[j-1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]);
+ }
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+ WelsSampleSadFour8x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad);
+ EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad);
+}
+
+TEST_F(SadSatdCFuncTest, WelsSampleSadFour4x4_c) {
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<3); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<3); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB+m_iStrideB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <4; i++ ) {
+ for(int j=0; j<4; j++) {
+ iSumSad+=abs(pPixA[j]-pPixB[j-1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]);
+ }
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+ WelsSampleSadFour4x4_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad);
+ EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad);
+}
+
+#ifdef X86_ASM
+class SadSatdAssemblyFuncTest : public testing::Test {
+public:
+ virtual void SetUp() {
+ int32_t iCpuCores = 0;
+ m_uiCpuFeatureFlag = WelsCPUFeatureDetect(&iCpuCores);
+ pMemAlign = new CMemoryAlign(16);
+ m_iStrideA = m_iStrideB = PIXEL_STRIDE;
+ m_pPixSrcA = (uint8_t *)pMemAlign->WelsMalloc(m_iStrideA<<5,"Sad_m_pPixSrcA");
+ ASSERT_TRUE(NULL != m_pPixSrcA);
+ m_pPixSrcB = (uint8_t *)pMemAlign->WelsMalloc(m_iStrideB<<5,"Sad_m_pPixSrcB");
+ ASSERT_MEMORY_FAIL2X(m_pPixSrcA, m_pPixSrcB)
+ m_pSad = (int32_t*)pMemAlign->WelsMalloc(4*sizeof(int32_t), "m_pSad");
+ ASSERT_MEMORY_FAIL3X(m_pPixSrcA, m_pPixSrcB, m_pSad)
+ }
+ virtual void TearDown() {
+ pMemAlign->WelsFree(m_pPixSrcA,"Sad_m_pPixSrcA");
+ pMemAlign->WelsFree(m_pPixSrcB,"Sad_m_pPixSrcB");
+ pMemAlign->WelsFree(m_pSad,"m_pSad");
+ delete pMemAlign;
+ }
+public:
+ uint32_t m_uiCpuFeatureFlag;
+ uint8_t* m_pPixSrcA;
+ uint8_t* m_pPixSrcB;
+ int32_t m_iStrideA;
+ int32_t m_iStrideB;
+ int32_t* m_pSad;
+
+ CMemoryAlign* pMemAlign;
+};
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSad4x4_mmx) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_MMXEXT))
+ return;
+
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<2); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<2); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSad4x4_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSad4x4_mmx(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB));
+}
+
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSad8x8_sse21) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<3); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<3); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSad8x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSad8x8_sse21(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB ));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSad8x16_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<4); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<4); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSad8x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSad8x16_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSad16x8_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<3); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<3); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSad16x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSad16x8_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSad16x16_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<4); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<4); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSad16x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSad16x16_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd4x4_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<2); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<2); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSatd4x4_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd4x4_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd8x8_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<3); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<3); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSatd8x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd8x8_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB ));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd8x16_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<4); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<4); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSatd8x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd8x16_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd16x8_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<3); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<3); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSatd16x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd16x8_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd16x16_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<4); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<4); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSatd16x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd16x16_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd4x4_sse41) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE41))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<2); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<2); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSatd4x4_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd4x4_sse41(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd8x8_sse41) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE41))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<3); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<3); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSatd8x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd8x8_sse41(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB ));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd8x16_sse41) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE41))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<4); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<4); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSatd8x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd8x16_sse41(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd16x8_sse41) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE41))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<3); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<3); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSatd16x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd16x8_sse41(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd16x16_sse41) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE41))
+ return;
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<4); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<4); i++)
+ m_pPixSrcB[i]=rand()%256;
+
+ EXPECT_EQ(WelsSampleSatd16x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd16x16_sse41(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB));
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSadFour16x16_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<5); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<5); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB+m_iStrideB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <16; i++ ) {
+ for(int j=0; j<16; j++) {
+ iSumSad+=abs(pPixA[j]-pPixB[j-1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]);
+ }
+
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+
+ WelsSampleSadFour16x16_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad);
+ EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad);
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSadFour16x8_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<5); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<5); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB+m_iStrideB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <8; i++ ) {
+ for(int j=0; j<16; j++) {
+ iSumSad+=abs(pPixA[j]-pPixB[j-1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]);
+ }
+
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+
+ WelsSampleSadFour16x8_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad);
+ EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad);
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSadFour8x16_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<5); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<5); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB+m_iStrideB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <16; i++ ) {
+ for(int j=0; j<8; j++) {
+ iSumSad+=abs(pPixA[j]-pPixB[j-1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]);
+ }
+
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+
+ WelsSampleSadFour8x16_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad);
+ EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad);
+}
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSadFour8x8_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<4); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<4); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB+m_iStrideB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <8; i++ ) {
+ for(int j=0; j<8; j++) {
+ iSumSad+=abs(pPixA[j]-pPixB[j-1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]);
+ }
+
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+
+ WelsSampleSadFour8x8_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad);
+ EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad);
+}
+
+
+TEST_F(SadSatdAssemblyFuncTest, WelsSampleSadFour4x4_sse2) {
+ if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2))
+ return;
+
+ srand((uint32_t)time(NULL));
+ for(int i=0; i<(m_iStrideA<<3); i++)
+ m_pPixSrcA[i]=rand()%256;
+ for(int i=0; i<(m_iStrideB<<3); i++)
+ m_pPixSrcB[i]=rand()%256;
+ uint8_t *pPixA=m_pPixSrcA;
+ uint8_t *pPixB=m_pPixSrcB+m_iStrideB;
+
+ int32_t iSumSad = 0;
+ for (int i = 0; i <4; i++ ) {
+ for(int j=0; j<4; j++) {
+ iSumSad+=abs(pPixA[j]-pPixB[j-1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+1]);
+ iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]);
+ iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]);
+ }
+
+ pPixA += m_iStrideA;
+ pPixB += m_iStrideB;
+ }
+
+ WelsSampleSadFour4x4_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad);
+ EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad);
+}
+#endif
--- a/test/encoder/targets.mk
+++ b/test/encoder/targets.mk
@@ -8,6 +8,7 @@
$(ENCODER_UNITTEST_SRCDIR)/EncUT_GetIntraPredictor.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MemoryAlloc.cpp\
$(ENCODER_UNITTEST_SRCDIR)/EncUT_MotionEstimate.cpp\
+ $(ENCODER_UNITTEST_SRCDIR)/EncUT_Sample.cpp\
ENCODER_UNITTEST_OBJS += $(ENCODER_UNITTEST_CPP_SRCS:.cpp=.$(OBJ))