ref: 70dc980703e5dbcff3dc35ab2f906e3711da7f18
parent: 9f28197d7c86497af8651c02afe13914af239160
parent: 1d5380787a30e8d37b3c925babaffd2d996ddea4
author: Jingning Han <jingning@google.com>
date: Thu Jul 12 13:29:43 EDT 2018
Merge "Add 32x32 Hadamard transform"
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -5822,7 +5822,7 @@
vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
dst_stride);
- vpx_fdct32x32(src_diff, coeff, bw);
+ vpx_hadamard_32x32(src_diff, bw, coeff);
intra_cost = vpx_satd(coeff, pix_num);
@@ -5879,7 +5879,7 @@
this_frame->y_buffer + mb_y_offset,
this_frame->y_stride, &predictor[0], bw);
#endif
- vpx_fdct32x32(src_diff, coeff, bw);
+ vpx_hadamard_32x32(src_diff, bw, coeff);
inter_cost = vpx_satd(coeff, pix_num);
--- a/vpx_dsp/avg.c
+++ b/vpx_dsp/avg.c
@@ -123,6 +123,37 @@
}
}
+void vpx_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride,
+ tran_low_t *coeff) {
+ int idx;
+ for (idx = 0; idx < 4; ++idx) {
+ // src_diff: 9 bit, dynamic range [-255, 255]
+ const int16_t *src_ptr =
+ src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16;
+ vpx_hadamard_16x16(src_ptr, src_stride, coeff + idx * 256);
+ }
+
+ // coeff: 15 bit, dynamic range [-16320, 16320]
+ for (idx = 0; idx < 256; ++idx) {
+ tran_low_t a0 = coeff[0];
+ tran_low_t a1 = coeff[256];
+ tran_low_t a2 = coeff[512];
+ tran_low_t a3 = coeff[768];
+
+ tran_low_t b0 = (a0 + a1) >> 2; // (a0 + a1): 16 bit, [-32640, 32640]
+ tran_low_t b1 = (a0 - a1) >> 2; // b0-b3: 15 bit, dynamic range
+ tran_low_t b2 = (a2 + a3) >> 2; // [-16320, 16320]
+ tran_low_t b3 = (a2 - a3) >> 2;
+
+ coeff[0] = b0 + b2; // 16 bit, [-32640, 32640]
+ coeff[256] = b1 + b3;
+ coeff[512] = b0 - b2;
+ coeff[768] = b1 - b3;
+
+ ++coeff;
+ }
+}
+
// coeff: 16 bits, dynamic range [-32640, 32640].
// length: value range {16, 64, 256, 1024}.
int vpx_satd_c(const tran_low_t *coeff, int length) {
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -782,6 +782,9 @@
add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
specialize qw/vpx_hadamard_16x16 avx2 sse2 neon vsx/;
+ add_proto qw/void vpx_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
+ specialize qw/vpx_hadamard_32x32/;
+
add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length";
specialize qw/vpx_satd avx2 sse2 neon/;
} else {
@@ -790,6 +793,9 @@
add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
specialize qw/vpx_hadamard_16x16 avx2 sse2 neon msa vsx/;
+
+ add_proto qw/void vpx_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
+ specialize qw/vpx_hadamard_32x32/;
add_proto qw/int vpx_satd/, "const int16_t *coeff, int length";
specialize qw/vpx_satd avx2 sse2 neon msa/;