ref: ece4fd5d2247c9512b31a93dd593de567beaf928
parent: 2b5baea8fdb7086dc5eb654cd8fb62bef0635793
	author: Jingning Han <jingning@google.com>
	date: Wed Dec  9 05:10:17 EST 2015
	
Backport temporal filter approach to VP9 This commit enables the new temporal filter system for VP9. For speed 1, it improves the compression performance: derf 0.54% stdhd 1.62% Change-Id: I041760044def943e464345223790d4efad70b91e
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -135,15 +135,38 @@
   for (i = 0, k = 0; i < block_height; i++) {     for (j = 0; j < block_width; j++, k++) {- int src_byte = frame1[byte];
- int pixel_value = *frame2++;
+ int pixel_value = *frame2;
- modifier = src_byte - pixel_value;
- // This is an integer approximation of:
- // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
- // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
- modifier *= modifier;
- modifier *= 3;
+ // non-local mean approach
+      int diff_sse[9] = { 0 };+ int idx, idy, index = 0;
+
+      for (idy = -1; idy <= 1; ++idy) {+        for (idx = -1; idx <= 1; ++idx) {+ int row = i + idy;
+ int col = j + idx;
+
+ if (row >= 0 && row < (int)block_height &&
+              col >= 0 && col < (int)block_width) {+ int diff = frame1[byte + idy * (int)stride + idx] -
+ frame2[idy * (int)block_width + idx];
+ diff_sse[index] = diff * diff;
+ ++index;
+ }
+ }
+ }
+
+ assert(index > 0);
+
+ modifier = 0;
+ for (idx = 0; idx < 9; ++idx)
+ modifier += diff_sse[idx];
+
+ modifier *= 3;
+ modifier /= index;
+
+ ++frame2;
+
modifier += rounding;
modifier >>= strength;
@@ -418,16 +441,17 @@
}
#else
// Apply the filter (YUV)
- vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
+ // TODO(jingning): Need SIMD optimization for this.
+ vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
- vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
+ vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
- vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
+ vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
--
⑨