ref: 9d5bc6fd749f9e9352f4ff41486e8322f28b2ee0
dir: /test/encoder/EncUT_Sample.cpp/
#include<gtest/gtest.h> #include<math.h> #include<stdlib.h> #include<time.h> #include "cpu_core.h" #include "cpu.h" #include "sample.h" #include "sad_common.h" using namespace WelsSVCEnc; #define ASSERT_MEMORY_FAIL2X(A, B) \ if (NULL == B) { \ pMemAlign->WelsFree(A, "Sad_SrcA");\ ASSERT_TRUE(0); \ } #define ASSERT_MEMORY_FAIL3X(A, B, C) \ if (NULL == C) { \ pMemAlign->WelsFree(A, "Sad_SrcA"); \ pMemAlign->WelsFree(B, "Sad_SrcB"); \ ASSERT_TRUE(0); \ } #define PIXEL_STRIDE 32 class SadSatdCFuncTest : public testing::Test { public: virtual void SetUp() { pMemAlign = new CMemoryAlign(0); srand((uint32_t)time(NULL)); m_iStrideA = rand()%256 + PIXEL_STRIDE; m_iStrideB = rand()%256 + PIXEL_STRIDE; m_pPixSrcA = (uint8_t *)pMemAlign->WelsMalloc(m_iStrideA<<5,"Sad_m_pPixSrcA"); ASSERT_TRUE(NULL != m_pPixSrcA); m_pPixSrcB = (uint8_t *)pMemAlign->WelsMalloc(m_iStrideB<<5,"Sad_m_pPixSrcB"); ASSERT_MEMORY_FAIL2X(m_pPixSrcA, m_pPixSrcB) m_pSad = (int32_t*)pMemAlign->WelsMalloc(4*sizeof(int32_t), "m_pSad"); ASSERT_MEMORY_FAIL3X(m_pPixSrcA, m_pPixSrcB, m_pSad) } virtual void TearDown() { pMemAlign->WelsFree(m_pPixSrcA,"Sad_m_pPixSrcA"); pMemAlign->WelsFree(m_pPixSrcB,"Sad_m_pPixSrcB"); pMemAlign->WelsFree(m_pSad,"m_pSad"); delete pMemAlign; } public: uint8_t* m_pPixSrcA; uint8_t* m_pPixSrcB; int32_t m_iStrideA; int32_t m_iStrideB; int32_t* m_pSad; CMemoryAlign* pMemAlign; }; TEST_F(SadSatdCFuncTest, WelsSampleSad4x4_c) { srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<2); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<2); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB; int32_t iSumSad = 0; for (int i = 0; i < 4; i++ ) { for(int j=0; j<4; j++) iSumSad+=abs(pPixA[j]-pPixB[j]); pPixA += m_iStrideA; pPixB += m_iStrideB; } EXPECT_EQ(WelsSampleSad4x4_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad); } TEST_F(SadSatdCFuncTest, WelsSampleSad8x8_c) { srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<3); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<3); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB; int32_t iSumSad = 0; for (int i = 0; i < 8; i++ ) { for(int j=0; j<8; j++) iSumSad+=abs(pPixA[j]-pPixB[j]); pPixA += m_iStrideA; pPixB += m_iStrideB; } EXPECT_EQ(WelsSampleSad8x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad); } TEST_F(SadSatdCFuncTest, WelsSampleSad16x8_c) { srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<3); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<3); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB; int32_t iSumSad = 0; for (int i = 0; i <8; i++ ) { for(int j=0; j<16; j++) iSumSad+=abs(pPixA[j]-pPixB[j]); pPixA += m_iStrideA; pPixB += m_iStrideB; } EXPECT_EQ(WelsSampleSad16x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad); } TEST_F(SadSatdCFuncTest, WelsSampleSad8x16_c) { srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<4); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<4); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB; int32_t iSumSad = 0; for (int i = 0; i <16; i++ ) { for(int j=0; j<8; j++) iSumSad+=abs(pPixA[j]-pPixB[j]); pPixA += m_iStrideA; pPixB += m_iStrideB; } EXPECT_EQ(WelsSampleSad8x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad); } TEST_F(SadSatdCFuncTest, WelsSampleSad16x16_c) { srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<4); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<4); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB; int32_t iSumSad = 0; for (int i = 0; i <16; i++ ) { for(int j=0; j<16; j++) iSumSad+=abs(pPixA[j]-pPixB[j]); pPixA += m_iStrideA; pPixB += m_iStrideB; } EXPECT_EQ(WelsSampleSad16x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad); } TEST_F(SadSatdCFuncTest, WelsSampleSatd4x4_c) { srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<2); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<2); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB; int32_t W[16],T[16],Y[16],k=0; for(int i=0; i<4; i++) { for(int j=0; j<4; j++) W[k++]=pPixA[j]-pPixB[j]; pPixA += m_iStrideA; pPixB += m_iStrideB; } T[0]=W[0]+W[4]+W[8]+W[12]; T[1]=W[1]+W[5]+W[9]+W[13]; T[2]=W[2]+W[6]+W[10]+W[14]; T[3]=W[3]+W[7]+W[11]+W[15]; T[4]=W[0]+W[4]-W[8]-W[12]; T[5]=W[1]+W[5]-W[9]-W[13]; T[6]=W[2]+W[6]-W[10]-W[14]; T[7]=W[3]+W[7]-W[11]-W[15]; T[8]=W[0]-W[4]-W[8]+W[12]; T[9]=W[1]-W[5]-W[9]+W[13]; T[10]=W[2]-W[6]-W[10]+W[14]; T[11]=W[3]-W[7]-W[11]+W[15]; T[12]=W[0]-W[4]+W[8]-W[12]; T[13]=W[1]-W[5]+W[9]-W[13]; T[14]=W[2]-W[6]+W[10]-W[14]; T[15]=W[3]-W[7]+W[11]-W[15]; Y[0]=T[0]+T[1]+T[2]+T[3]; Y[1]=T[0]+T[1]-T[2]-T[3]; Y[2]=T[0]-T[1]-T[2]+T[3]; Y[3]=T[0]-T[1]+T[2]-T[3]; Y[4]=T[4]+T[5]+T[6]+T[7]; Y[5]=T[4]+T[5]-T[6]-T[7]; Y[6]=T[4]-T[5]-T[6]+T[7]; Y[7]=T[4]-T[5]+T[6]-T[7]; Y[8]=T[8]+T[9]+T[10]+T[11]; Y[9]=T[8]+T[9]-T[10]-T[11]; Y[10]=T[8]-T[9]-T[10]+T[11]; Y[11]=T[8]-T[9]+T[10]-T[11]; Y[12]=T[12]+T[13]+T[14]+T[15]; Y[13]=T[12]+T[13]-T[14]-T[15]; Y[14]=T[12]-T[13]-T[14]+T[15]; Y[15]=T[12]-T[13]+T[14]-T[15]; int32_t iSumSatd = 0; for(int i=0; i<16; i++) iSumSatd+=abs(Y[i]); EXPECT_EQ(WelsSampleSatd4x4_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), (iSumSatd+1)>>1); } TEST_F(SadSatdCFuncTest, WelsSampleSadFour16x16_c) { srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<5); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<5); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB+m_iStrideB; int32_t iSumSad = 0; for (int i = 0; i <16; i++ ) { for(int j=0; j<16; j++) { iSumSad+=abs(pPixA[j]-pPixB[j-1]); iSumSad+=abs(pPixA[j]-pPixB[j+1]); iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]); iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]); } pPixA += m_iStrideA; pPixB += m_iStrideB; } WelsSampleSadFour16x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad); EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad); } TEST_F(SadSatdCFuncTest, WelsSampleSadFour16x8_c) { srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<5); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<5); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB+m_iStrideB; int32_t iSumSad = 0; for (int i = 0; i <8; i++ ) { for(int j=0; j<16; j++) { iSumSad+=abs(pPixA[j]-pPixB[j-1]); iSumSad+=abs(pPixA[j]-pPixB[j+1]); iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]); iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]); } pPixA += m_iStrideA; pPixB += m_iStrideB; } WelsSampleSadFour16x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad); EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad); } TEST_F(SadSatdCFuncTest, WelsSampleSadFour8x16_c) { srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<5); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<5); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB+m_iStrideB; int32_t iSumSad = 0; for (int i = 0; i <16; i++ ) { for(int j=0; j<8; j++) { iSumSad+=abs(pPixA[j]-pPixB[j-1]); iSumSad+=abs(pPixA[j]-pPixB[j+1]); iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]); iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]); } pPixA += m_iStrideA; pPixB += m_iStrideB; } WelsSampleSadFour8x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad); EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad); } TEST_F(SadSatdCFuncTest, WelsSampleSadFour8x8_c) { srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<4); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<4); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB+m_iStrideB; int32_t iSumSad = 0; for (int i = 0; i <8; i++ ) { for(int j=0; j<8; j++) { iSumSad+=abs(pPixA[j]-pPixB[j-1]); iSumSad+=abs(pPixA[j]-pPixB[j+1]); iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]); iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]); } pPixA += m_iStrideA; pPixB += m_iStrideB; } WelsSampleSadFour8x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad); EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad); } TEST_F(SadSatdCFuncTest, WelsSampleSadFour4x4_c) { srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<3); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<3); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB+m_iStrideB; int32_t iSumSad = 0; for (int i = 0; i <4; i++ ) { for(int j=0; j<4; j++) { iSumSad+=abs(pPixA[j]-pPixB[j-1]); iSumSad+=abs(pPixA[j]-pPixB[j+1]); iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]); iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]); } pPixA += m_iStrideA; pPixB += m_iStrideB; } WelsSampleSadFour4x4_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad); EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad); } #ifdef X86_ASM class SadSatdAssemblyFuncTest : public testing::Test { public: virtual void SetUp() { int32_t iCpuCores = 0; m_uiCpuFeatureFlag = WelsCPUFeatureDetect(&iCpuCores); pMemAlign = new CMemoryAlign(16); m_iStrideA = m_iStrideB = PIXEL_STRIDE; m_pPixSrcA = (uint8_t *)pMemAlign->WelsMalloc(m_iStrideA<<5,"Sad_m_pPixSrcA"); ASSERT_TRUE(NULL != m_pPixSrcA); m_pPixSrcB = (uint8_t *)pMemAlign->WelsMalloc(m_iStrideB<<5,"Sad_m_pPixSrcB"); ASSERT_MEMORY_FAIL2X(m_pPixSrcA, m_pPixSrcB) m_pSad = (int32_t*)pMemAlign->WelsMalloc(4*sizeof(int32_t), "m_pSad"); ASSERT_MEMORY_FAIL3X(m_pPixSrcA, m_pPixSrcB, m_pSad) } virtual void TearDown() { pMemAlign->WelsFree(m_pPixSrcA,"Sad_m_pPixSrcA"); pMemAlign->WelsFree(m_pPixSrcB,"Sad_m_pPixSrcB"); pMemAlign->WelsFree(m_pSad,"m_pSad"); delete pMemAlign; } public: uint32_t m_uiCpuFeatureFlag; uint8_t* m_pPixSrcA; uint8_t* m_pPixSrcB; int32_t m_iStrideA; int32_t m_iStrideB; int32_t* m_pSad; CMemoryAlign* pMemAlign; }; TEST_F(SadSatdAssemblyFuncTest, WelsSampleSad4x4_mmx) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_MMXEXT)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<2); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<2); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSad4x4_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSad4x4_mmx(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB)); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSad8x8_sse21) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<3); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<3); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSad8x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSad8x8_sse21(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB )); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSad8x16_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<4); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<4); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSad8x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSad8x16_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB)); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSad16x8_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<3); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<3); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSad16x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSad16x8_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB)); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSad16x16_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<4); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<4); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSad16x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSad16x16_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB)); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd4x4_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<2); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<2); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSatd4x4_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd4x4_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB)); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd8x8_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<3); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<3); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSatd8x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd8x8_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB )); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd8x16_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<4); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<4); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSatd8x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd8x16_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB)); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd16x8_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<3); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<3); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSatd16x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd16x8_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB)); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd16x16_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<4); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<4); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSatd16x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd16x16_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB)); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd4x4_sse41) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE41)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<2); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<2); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSatd4x4_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd4x4_sse41(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB)); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd8x8_sse41) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE41)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<3); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<3); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSatd8x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd8x8_sse41(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB )); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd8x16_sse41) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE41)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<4); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<4); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSatd8x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd8x16_sse41(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB)); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd16x8_sse41) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE41)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<3); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<3); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSatd16x8_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd16x8_sse41(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB)); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSatd16x16_sse41) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE41)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<4); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<4); i++) m_pPixSrcB[i]=rand()%256; EXPECT_EQ(WelsSampleSatd16x16_c(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), WelsSampleSatd16x16_sse41(m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB)); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSadFour16x16_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<5); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<5); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB+m_iStrideB; int32_t iSumSad = 0; for (int i = 0; i <16; i++ ) { for(int j=0; j<16; j++) { iSumSad+=abs(pPixA[j]-pPixB[j-1]); iSumSad+=abs(pPixA[j]-pPixB[j+1]); iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]); iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]); } pPixA += m_iStrideA; pPixB += m_iStrideB; } WelsSampleSadFour16x16_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad); EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSadFour16x8_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<5); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<5); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB+m_iStrideB; int32_t iSumSad = 0; for (int i = 0; i <8; i++ ) { for(int j=0; j<16; j++) { iSumSad+=abs(pPixA[j]-pPixB[j-1]); iSumSad+=abs(pPixA[j]-pPixB[j+1]); iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]); iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]); } pPixA += m_iStrideA; pPixB += m_iStrideB; } WelsSampleSadFour16x8_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad); EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSadFour8x16_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<5); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<5); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB+m_iStrideB; int32_t iSumSad = 0; for (int i = 0; i <16; i++ ) { for(int j=0; j<8; j++) { iSumSad+=abs(pPixA[j]-pPixB[j-1]); iSumSad+=abs(pPixA[j]-pPixB[j+1]); iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]); iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]); } pPixA += m_iStrideA; pPixB += m_iStrideB; } WelsSampleSadFour8x16_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad); EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSadFour8x8_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<4); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<4); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB+m_iStrideB; int32_t iSumSad = 0; for (int i = 0; i <8; i++ ) { for(int j=0; j<8; j++) { iSumSad+=abs(pPixA[j]-pPixB[j-1]); iSumSad+=abs(pPixA[j]-pPixB[j+1]); iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]); iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]); } pPixA += m_iStrideA; pPixB += m_iStrideB; } WelsSampleSadFour8x8_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad); EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad); } TEST_F(SadSatdAssemblyFuncTest, WelsSampleSadFour4x4_sse2) { if (0 == (m_uiCpuFeatureFlag & WELS_CPU_SSE2)) return; srand((uint32_t)time(NULL)); for(int i=0; i<(m_iStrideA<<3); i++) m_pPixSrcA[i]=rand()%256; for(int i=0; i<(m_iStrideB<<3); i++) m_pPixSrcB[i]=rand()%256; uint8_t *pPixA=m_pPixSrcA; uint8_t *pPixB=m_pPixSrcB+m_iStrideB; int32_t iSumSad = 0; for (int i = 0; i <4; i++ ) { for(int j=0; j<4; j++) { iSumSad+=abs(pPixA[j]-pPixB[j-1]); iSumSad+=abs(pPixA[j]-pPixB[j+1]); iSumSad+=abs(pPixA[j]-pPixB[j-m_iStrideB]); iSumSad+=abs(pPixA[j]-pPixB[j+m_iStrideB]); } pPixA += m_iStrideA; pPixB += m_iStrideB; } WelsSampleSadFour4x4_sse2(m_pPixSrcA, m_iStrideA, m_pPixSrcB+m_iStrideB, m_iStrideB, m_pSad); EXPECT_EQ(m_pSad[0]+m_pSad[1]+m_pSad[2]+m_pSad[3],iSumSad); } #endif