ref: 05437805f79780ad3f6afdff1a845512adf27201
parent: f7b4364cbbe3863426fce30af2095978749c71aa
	author: James Zern <jzern@google.com>
	date: Fri Feb  5 07:31:48 EST 2016
	
intrapred/d135: flatten border results before storing the results along the top and left border are then stored with a moving window into the vector. ~40-67% faster on ARM, ~40-77+% on x86 depending on the block size. Change-Id: Iab369aa2946a3ae4eb7290d512868fe5db92dbc8
--- a/vpx_dsp/intrapred.c
+++ b/vpx_dsp/intrapred.c
@@ -158,20 +158,29 @@
static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                   const uint8_t *above, const uint8_t *left) {- int r, c;
- dst[0] = AVG3(left[0], above[-1], above[0]);
- for (c = 1; c < bs; c++)
- dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
+ int i;
+#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7
+ // silence a spurious -Warray-bounds warning, possibly related to:
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273
+ uint8_t border[69];
+#else
+ uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right
+#endif
- dst[stride] = AVG3(above[-1], left[0], left[1]);
- for (r = 2; r < bs; ++r)
- dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
+ // dst(bs, bs - 2)[0], i.e., border starting at bottom-left
+  for (i = 0; i < bs - 2; ++i) {+ border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
+ }
+ border[bs - 2] = AVG3(above[-1], left[0], left[1]);
+ border[bs - 1] = AVG3(left[0], above[-1], above[0]);
+ border[bs - 0] = AVG3(above[-1], above[0], above[1]);
+ // dst[0][2, size), i.e., remaining top border ascending
+  for (i = 0; i < bs - 2; ++i) {+ border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]);
+ }
- dst += stride;
-  for (r = 1; r < bs; ++r) {- for (c = 1; c < bs; c++)
- dst[c] = dst[-stride + c - 1];
- dst += stride;
+  for (i = 0; i < bs; ++i) {+ memcpy(dst + i * stride, border + bs - 1 - i, bs);
}
}
--
⑨