ref: 005fc6970b4c1997d40b98d2ac01d34f39310606
parent: 4d8ebc9ec450fbbc049e19711c1a4f062459c23e
	author: Dmitry Kovalev <dkovalev@google.com>
	date: Thu Feb  6 06:54:15 EST 2014
	
Finally removing "short" from transform names. Change-Id: I5259b68dc1bcceb153e3ffe638a79a59a3019e9d
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -273,7 +273,7 @@
}
 void fht16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {- vp9_short_fht16x16_c(in, out, stride, tx_type);
+ vp9_fht16x16_c(in, out, stride, tx_type);
}
 class Trans16x16TestBase {@@ -507,10 +507,10 @@
INSTANTIATE_TEST_CASE_P(
C, Trans16x16HT,
::testing::Values(
- make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 0),
- make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 1),
- make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 2),
- make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 3)));
+ make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0),
+ make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1),
+ make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2),
+ make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3)));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
@@ -521,9 +521,9 @@
INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16HT,
::testing::Values(
- make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0),
- make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1),
- make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2),
- make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3)));
+ make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0),
+ make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1),
+ make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2),
+ make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3)));
#endif
} // namespace
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -45,7 +45,7 @@
}
 void fht4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {- vp9_short_fht4x4_c(in, out, stride, tx_type);
+ vp9_fht4x4_c(in, out, stride, tx_type);
}
 class Trans4x4TestBase {@@ -281,10 +281,10 @@
INSTANTIATE_TEST_CASE_P(
C, Trans4x4HT,
::testing::Values(
- make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 0),
- make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 1),
- make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 2),
- make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 3)));
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3)));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
@@ -295,10 +295,10 @@
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4HT,
::testing::Values(
- make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0),
- make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1),
- make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2),
- make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3)));
+ make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0),
+ make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1),
+ make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2),
+ make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3)));
#endif
} // namespace
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -44,7 +44,7 @@
}
 void fht8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {- vp9_short_fht8x8_c(in, out, stride, tx_type);
+ vp9_fht8x8_c(in, out, stride, tx_type);
}
 class FwdTrans8x8TestBase {@@ -308,10 +308,10 @@
INSTANTIATE_TEST_CASE_P(
C, FwdTrans8x8HT,
::testing::Values(
- make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 0),
- make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 1),
- make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 2),
- make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 3)));
+ make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0),
+ make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1),
+ make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2),
+ make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3)));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
@@ -321,9 +321,9 @@
INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8HT,
::testing::Values(
- make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0),
- make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1),
- make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2),
- make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3)));
+ make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0),
+ make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1),
+ make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2),
+ make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3)));
#endif
} // namespace
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -707,14 +707,14 @@
fi
# fdct functions
-prototype void vp9_short_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_short_fht4x4 sse2 avx2
+prototype void vp9_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
+specialize vp9_fht4x4 sse2 avx2
-prototype void vp9_short_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_short_fht8x8 sse2 avx2
+prototype void vp9_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
+specialize vp9_fht8x8 sse2 avx2
-prototype void vp9_short_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_short_fht16x16 sse2 avx2
+prototype void vp9_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
+specialize vp9_fht16x16 sse2 avx2
prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride"
specialize vp9_fwht4x4
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -18,8 +18,6 @@
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_systemdependent.h"
-#include "vp9/encoder/vp9_dct.h"
-
 static INLINE int fdct_round_shift(int input) {int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
assert(INT16_MIN <= rv && rv <= INT16_MAX);
@@ -157,32 +155,36 @@
   { fadst4, fadst4 }   // ADST_ADST = 3};
-void vp9_short_fht4x4_c(const int16_t *input, int16_t *output,
-                        int stride, int tx_type) {- int16_t out[4 * 4];
- int16_t *outptr = &out[0];
- int i, j;
- int16_t temp_in[4], temp_out[4];
- const transform_2d ht = FHT_4[tx_type];
+void vp9_fht4x4_c(const int16_t *input, int16_t *output,
+                  int stride, int tx_type) {+  if (tx_type == DCT_DCT) {+ vp9_fdct4x4_c(input, output, stride);
+  } else {+ int16_t out[4 * 4];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[4], temp_out[4];
+ const transform_2d ht = FHT_4[tx_type];
- // Columns
-  for (i = 0; i < 4; ++i) {- for (j = 0; j < 4; ++j)
- temp_in[j] = input[j * stride + i] * 16;
- if (i == 0 && temp_in[0])
- temp_in[0] += 1;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- outptr[j * 4 + i] = temp_out[j];
- }
+ // Columns
+    for (i = 0; i < 4; ++i) {+ for (j = 0; j < 4; ++j)
+ temp_in[j] = input[j * stride + i] * 16;
+ if (i == 0 && temp_in[0])
+ temp_in[0] += 1;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ outptr[j * 4 + i] = temp_out[j];
+ }
- // Rows
-  for (i = 0; i < 4; ++i) {- for (j = 0; j < 4; ++j)
- temp_in[j] = out[j + i * 4];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- output[j + i * 4] = (temp_out[j] + 1) >> 2;
+ // Rows
+    for (i = 0; i < 4; ++i) {+ for (j = 0; j < 4; ++j)
+ temp_in[j] = out[j + i * 4];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ output[j + i * 4] = (temp_out[j] + 1) >> 2;
+ }
}
}
@@ -565,30 +567,34 @@
   { fadst8, fadst8 }   // ADST_ADST = 3};
-void vp9_short_fht8x8_c(const int16_t *input, int16_t *output,
-                        int stride, int tx_type) {- int16_t out[64];
- int16_t *outptr = &out[0];
- int i, j;
- int16_t temp_in[8], temp_out[8];
- const transform_2d ht = FHT_8[tx_type];
+void vp9_fht8x8_c(const int16_t *input, int16_t *output,
+                  int stride, int tx_type) {+  if (tx_type == DCT_DCT) {+ vp9_fdct8x8_c(input, output, stride);
+  } else {+ int16_t out[64];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[8], temp_out[8];
+ const transform_2d ht = FHT_8[tx_type];
- // Columns
-  for (i = 0; i < 8; ++i) {- for (j = 0; j < 8; ++j)
- temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 8; ++j)
- outptr[j * 8 + i] = temp_out[j];
- }
+ // Columns
+    for (i = 0; i < 8; ++i) {+ for (j = 0; j < 8; ++j)
+ temp_in[j] = input[j * stride + i] * 4;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ outptr[j * 8 + i] = temp_out[j];
+ }
- // Rows
-  for (i = 0; i < 8; ++i) {- for (j = 0; j < 8; ++j)
- temp_in[j] = out[j + i * 8];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 8; ++j)
- output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+ // Rows
+    for (i = 0; i < 8; ++i) {+ for (j = 0; j < 8; ++j)
+ temp_in[j] = out[j + i * 8];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+ }
}
}
@@ -958,31 +964,34 @@
   { fadst16, fadst16 }   // ADST_ADST = 3};
-void vp9_short_fht16x16_c(const int16_t *input, int16_t *output,
-                          int stride, int tx_type) {- int16_t out[256];
- int16_t *outptr = &out[0];
- int i, j;
- int16_t temp_in[16], temp_out[16];
- const transform_2d ht = FHT_16[tx_type];
+void vp9_fht16x16_c(const int16_t *input, int16_t *output,
+                    int stride, int tx_type) {+  if (tx_type == DCT_DCT) {+ vp9_fdct16x16_c(input, output, stride);
+  } else {+ int16_t out[256];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[16], temp_out[16];
+ const transform_2d ht = FHT_16[tx_type];
- // Columns
-  for (i = 0; i < 16; ++i) {- for (j = 0; j < 16; ++j)
- temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
-// outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
- }
+ // Columns
+    for (i = 0; i < 16; ++i) {+ for (j = 0; j < 16; ++j)
+ temp_in[j] = input[j * stride + i] * 4;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
+ }
- // Rows
-  for (i = 0; i < 16; ++i) {- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j + i * 16];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- output[j + i * 16] = temp_out[j];
+ // Rows
+    for (i = 0; i < 16; ++i) {+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j + i * 16];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ output[j + i * 16] = temp_out[j];
+ }
}
}
@@ -1374,28 +1383,4 @@
for (j = 0; j < 32; ++j)
out[j + i * 32] = temp_out[j];
}
-}
-
-void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
-                int stride) {- if (tx_type == DCT_DCT)
- vp9_fdct4x4(input, output, stride);
- else
- vp9_short_fht4x4(input, output, stride, tx_type);
-}
-
-void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
-                int stride) {- if (tx_type == DCT_DCT)
- vp9_fdct8x8(input, output, stride);
- else
- vp9_short_fht8x8(input, output, stride, tx_type);
-}
-
-void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
-                  int stride) {- if (tx_type == DCT_DCT)
- vp9_fdct16x16(input, output, stride);
- else
- vp9_short_fht16x16(input, output, stride, tx_type);
}
--- a/vp9/encoder/vp9_dct.h
+++ /dev/null
@@ -1,32 +1,0 @@
-/*
- * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_ENCODER_VP9_DCT_H_
-#define VP9_ENCODER_VP9_DCT_H_
-
-#ifdef __cplusplus
-extern "C" {-#endif
-
-void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride);
-
-void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride);
-
-void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP9_ENCODER_VP9_DCT_H_
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -19,7 +19,6 @@
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_systemdependent.h"
-#include "vp9/encoder/vp9_dct.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rdopt.h"
@@ -571,7 +570,7 @@
       if (!x->skip_recode) {vp9_subtract_block(16, 16, src_diff, diff_stride,
src, p->src.stride, dst, pd->dst.stride);
- vp9_fht16x16(tx_type, src_diff, coeff, diff_stride);
+ vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan,
@@ -591,7 +590,7 @@
       if (!x->skip_recode) {vp9_subtract_block(8, 8, src_diff, diff_stride,
src, p->src.stride, dst, pd->dst.stride);
- vp9_fht8x8(tx_type, src_diff, coeff, diff_stride);
+ vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan,
@@ -617,7 +616,7 @@
vp9_subtract_block(4, 4, src_diff, diff_stride,
src, p->src.stride, dst, pd->dst.stride);
if (tx_type != DCT_DCT)
- vp9_short_fht4x4(src_diff, coeff, diff_stride, tx_type);
+ vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1064,7 +1064,7 @@
so = &vp9_scan_orders[TX_4X4][tx_type];
if (tx_type != DCT_DCT)
- vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
+ vp9_fht4x4(src_diff, coeff, 8, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, 8);
--- a/vp9/encoder/x86/vp9_dct_avx2.c
+++ b/vp9/encoder/x86/vp9_dct_avx2.c
@@ -244,32 +244,36 @@
transpose_4x4_avx2(in);
}
-void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output,
-                           int stride, int tx_type) {+void vp9_fht4x4_avx2(const int16_t *input, int16_t *output,
+                     int stride, int tx_type) {__m128i in[4];
- load_buffer_4x4_avx2(input, in, stride);
+
   switch (tx_type) {- case 0: // DCT_DCT
- fdct4_avx2(in);
- fdct4_avx2(in);
+ case DCT_DCT:
+ vp9_fdct4x4_avx2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_4x4_avx2(input, in, stride);
fadst4_avx2(in);
fdct4_avx2(in);
+ write_buffer_4x4_avx2(output, in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_4x4_avx2(input, in, stride);
fdct4_avx2(in);
fadst4_avx2(in);
+ write_buffer_4x4_avx2(output, in);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_4x4_avx2(input, in, stride);
fadst4_avx2(in);
fadst4_avx2(in);
+ write_buffer_4x4_avx2(output, in);
break;
default:
assert(0);
break;
}
- write_buffer_4x4_avx2(output, in);
}
 void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {@@ -1028,33 +1032,39 @@
array_transpose_8x8_avx2(in, in);
}
-void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output,
-                           int stride, int tx_type) {+void vp9_fht8x8_avx2(const int16_t *input, int16_t *output,
+                     int stride, int tx_type) {__m128i in[8];
- load_buffer_8x8_avx2(input, in, stride);
+
   switch (tx_type) {- case 0: // DCT_DCT
- fdct8_avx2(in);
- fdct8_avx2(in);
+ case DCT_DCT:
+ vp9_fdct8x8_avx2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_8x8_avx2(input, in, stride);
fadst8_avx2(in);
fdct8_avx2(in);
+ right_shift_8x8_avx2(in, 1);
+ write_buffer_8x8_avx2(output, in, 8);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_8x8_avx2(input, in, stride);
fdct8_avx2(in);
fadst8_avx2(in);
+ right_shift_8x8_avx2(in, 1);
+ write_buffer_8x8_avx2(output, in, 8);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_8x8_avx2(input, in, stride);
fadst8_avx2(in);
fadst8_avx2(in);
+ right_shift_8x8_avx2(in, 1);
+ write_buffer_8x8_avx2(output, in, 8);
break;
default:
assert(0);
break;
}
- right_shift_8x8_avx2(in, 1);
- write_buffer_8x8_avx2(output, in, 8);
}
 void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {@@ -2534,36 +2544,39 @@
array_transpose_16x16_avx2(in0, in1);
}
-void vp9_short_fht16x16_avx2(const int16_t *input, int16_t *output,
-                             int stride, int tx_type) {+void vp9_fht16x16_avx2(const int16_t *input, int16_t *output,
+                      int stride, int tx_type) {__m128i in0[16], in1[16];
- load_buffer_16x16_avx2(input, in0, in1, stride);
+
   switch (tx_type) {- case 0: // DCT_DCT
- fdct16_avx2(in0, in1);
- right_shift_16x16_avx2(in0, in1);
- fdct16_avx2(in0, in1);
+ case DCT_DCT:
+ vp9_fdct16x16_avx2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_16x16_avx2(input, in0, in1, stride);
fadst16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
fdct16_avx2(in0, in1);
+ write_buffer_16x16_avx2(output, in0, in1, 16);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_16x16_avx2(input, in0, in1, stride);
fdct16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
fadst16_avx2(in0, in1);
+ write_buffer_16x16_avx2(output, in0, in1, 16);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_16x16_avx2(input, in0, in1, stride);
fadst16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
fadst16_avx2(in0, in1);
+ write_buffer_16x16_avx2(output, in0, in1, 16);
break;
default:
assert(0);
break;
}
- write_buffer_16x16_avx2(output, in0, in1, 16);
}
#define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -242,32 +242,36 @@
transpose_4x4(in);
}
-void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output,
-                           int stride, int tx_type) {+void vp9_fht4x4_sse2(const int16_t *input, int16_t *output,
+                     int stride, int tx_type) {__m128i in[4];
- load_buffer_4x4(input, in, stride);
+
   switch (tx_type) {- case 0: // DCT_DCT
- fdct4_sse2(in);
- fdct4_sse2(in);
+ case DCT_DCT:
+ vp9_fdct4x4_sse2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_4x4(input, in, stride);
fadst4_sse2(in);
fdct4_sse2(in);
+ write_buffer_4x4(output, in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_4x4(input, in, stride);
fdct4_sse2(in);
fadst4_sse2(in);
+ write_buffer_4x4(output, in);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_4x4(input, in, stride);
fadst4_sse2(in);
fadst4_sse2(in);
+ write_buffer_4x4(output, in);
break;
- default:
- assert(0);
- break;
+ default:
+ assert(0);
+ break;
}
- write_buffer_4x4(output, in);
}
 void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {@@ -1026,33 +1030,39 @@
array_transpose_8x8(in, in);
}
-void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output,
-                           int stride, int tx_type) {+void vp9_fht8x8_sse2(const int16_t *input, int16_t *output,
+                     int stride, int tx_type) {__m128i in[8];
- load_buffer_8x8(input, in, stride);
+
   switch (tx_type) {- case 0: // DCT_DCT
- fdct8_sse2(in);
- fdct8_sse2(in);
+ case DCT_DCT:
+ vp9_fdct8x8_sse2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_8x8(input, in, stride);
fadst8_sse2(in);
fdct8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_8x8(input, in, stride);
fdct8_sse2(in);
fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_8x8(input, in, stride);
fadst8_sse2(in);
fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
break;
default:
assert(0);
break;
}
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
}
 void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {@@ -2532,36 +2542,39 @@
array_transpose_16x16(in0, in1);
}
-void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output,
-                             int stride, int tx_type) {+void vp9_fht16x16_sse2(const int16_t *input, int16_t *output,
+                       int stride, int tx_type) {__m128i in0[16], in1[16];
- load_buffer_16x16(input, in0, in1, stride);
+
   switch (tx_type) {- case 0: // DCT_DCT
- fdct16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdct16_sse2(in0, in1);
+ case DCT_DCT:
+ vp9_fdct16x16_sse2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fdct16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_16x16(input, in0, in1, stride);
fdct16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
break;
default:
assert(0);
break;
}
- write_buffer_16x16(output, in0, in1, 16);
}
#define FDCT32x32_2D vp9_fdct32x32_rd_sse2
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -19,7 +19,6 @@
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
VP9_CX_SRCS-yes += encoder/vp9_dct.c
-VP9_CX_SRCS-yes += encoder/vp9_dct.h
VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c
VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h
VP9_CX_SRCS-yes += encoder/vp9_encodemb.c
--
⑨