ref: b9adbcf37cf478268ad4647d1028a9dbd0332797
parent: 8764231784b251029d9bb2a83383e98e9c42905c
author: Sindre Aamås <saamas@cisco.com>
date: Mon Jan 18 15:25:46 EST 2016
[UT] Add missing SSE2 4x4 IDCT test IDCT input is defined in such a way that the intermediate values cannot legally overflow an int16_t. The use of random values as input causes such overflows. This results in implementation- dependent output depending on which type is used to hold intermediate results. Use a template for the test reference implementation to test implementations with different intermediate representation.
--- a/test/encoder/EncUT_DecodeMbAux.cpp
+++ b/test/encoder/EncUT_DecodeMbAux.cpp
@@ -179,6 +179,7 @@
EXPECT_TRUE (ok);
}
#define FDEC_STRIDE 32
+template<typename clip_t>
void WelsIDctT4Anchor (uint8_t* p_dst, int16_t dct[16]) {
int16_t tmp[16];
int32_t iStridex2 = (FDEC_STRIDE << 1);
@@ -193,13 +194,13 @@
}
for (i = 0; i < 4; i++) {
uiDst = p_dst[i];
- p_dst[i] = WelsClip1 (uiDst + ((tmp[i] + tmp[4 + i] + tmp[8 + i] + (tmp[12 + i] >> 1) + 32) >> 6));
+ p_dst[i] = WelsClip1 (uiDst + (clip_t (tmp[i] + tmp[4 + i] + tmp[8 + i] + (tmp[12 + i] >> 1) + 32) >> 6));
uiDst = p_dst[i + FDEC_STRIDE];
- p_dst[i + FDEC_STRIDE] = WelsClip1 (uiDst + ((tmp[i] + (tmp[4 + i] >> 1) - tmp[8 + i] - tmp[12 + i] + 32) >> 6));
+ p_dst[i + FDEC_STRIDE] = WelsClip1 (uiDst + (clip_t (tmp[i] + (tmp[4 + i] >> 1) - tmp[8 + i] - tmp[12 + i] + 32) >> 6));
uiDst = p_dst[i + iStridex2];
- p_dst[i + iStridex2] = WelsClip1 (uiDst + ((tmp[i] - (tmp[4 + i] >> 1) - tmp[8 + i] + tmp[12 + i] + 32) >> 6));
+ p_dst[i + iStridex2] = WelsClip1 (uiDst + (clip_t (tmp[i] - (tmp[4 + i] >> 1) - tmp[8 + i] + tmp[12 + i] + 32) >> 6));
uiDst = p_dst[i + iStridex3];
- p_dst[i + iStridex3] = WelsClip1 (uiDst + ((tmp[i] - tmp[4 + i] + tmp[8 + i] - (tmp[12 + i] >> 1) + 32) >> 6));
+ p_dst[i + iStridex3] = WelsClip1 (uiDst + (clip_t (tmp[i] - tmp[4 + i] + tmp[8 + i] - (tmp[12 + i] >> 1) + 32) >> 6));
}
}
TEST (DecodeMbAuxTest, WelsIDctT4Rec_c) {
@@ -214,7 +215,7 @@
iPred[i * FDEC_STRIDE + j] = iRefDst[i * FDEC_STRIDE + j] = rand() & 255;
}
}
- WelsIDctT4Anchor (iRefDst, iRefDct);
+ WelsIDctT4Anchor<int32_t> (iRefDst, iRefDct);
WelsIDctT4Rec_c (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
int ok = -1;
for (int i = 0; i < 4; i++) {
@@ -257,13 +258,15 @@
}
}
#endif
+template<typename clip_t>
void WelsIDctT8Anchor (uint8_t* p_dst, int16_t dct[4][16]) {
- WelsIDctT4Anchor (&p_dst[0], dct[0]);
- WelsIDctT4Anchor (&p_dst[4], dct[1]);
- WelsIDctT4Anchor (&p_dst[4 * FDEC_STRIDE + 0], dct[2]);
- WelsIDctT4Anchor (&p_dst[4 * FDEC_STRIDE + 4], dct[3]);
+ WelsIDctT4Anchor<clip_t> (&p_dst[0], dct[0]);
+ WelsIDctT4Anchor<clip_t> (&p_dst[4], dct[1]);
+ WelsIDctT4Anchor<clip_t> (&p_dst[4 * FDEC_STRIDE + 0], dct[2]);
+ WelsIDctT4Anchor<clip_t> (&p_dst[4 * FDEC_STRIDE + 4], dct[3]);
}
-TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_c) {
+template<typename clip_t>
+void TestIDctFourT4Rec (void (*func) (uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct)) {
int16_t iRefDct[4][16];
uint8_t iRefDst[16 * FDEC_STRIDE];
ENFORCE_STACK_ALIGN_1D (int16_t, iDct, 64, 16);
@@ -277,8 +280,8 @@
for (int j = 0; j < 8; j++)
iPred[i * FDEC_STRIDE + j] = iRefDst[i * FDEC_STRIDE + j] = rand() & 255;
- WelsIDctT8Anchor (iRefDst, iRefDct);
- WelsIDctFourT4Rec_c (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
+ WelsIDctT8Anchor<clip_t> (iRefDst, iRefDct);
+ func (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
int ok = -1;
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
@@ -290,6 +293,9 @@
}
EXPECT_EQ (ok, -1);
}
+TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_c) {
+ TestIDctFourT4Rec<int32_t> (WelsIDctFourT4Rec_c);
+}
void WelsIDctRecI16x4DcAnchor (uint8_t* p_dst, int16_t dct[4]) {
for (int i = 0; i < 4; i++, p_dst += FDEC_STRIDE) {
p_dst[0] = WelsClip1 (p_dst[0] + ((dct[0] + 32) >> 6));
@@ -345,6 +351,9 @@
EXPECT_EQ (ok, -1);
}
#if defined(X86_ASM)
+TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_sse2) {
+ TestIDctFourT4Rec<int16_t> (WelsIDctFourT4Rec_sse2);
+}
TEST (DecodeMbAuxTest, WelsIDctRecI16x16Dc_sse2) {
int32_t iCpuCores = 0;
uint32_t uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores);