ref: 6903efa93db09df21670fb534d7127993032c574
parent: ceaa3c37a91c6e15af73f3c0d1b85de12a7c5c8e
 parent: b6c5dbe9ef38c8e90c7b3930ab8e336051b67708
	author: Jim Bankoski <jimbankoski@google.com>
	date: Sun Sep 29 06:31:09 EDT 2013
	
Merge "mips dsp-ase r2 vp9 decoder extend module optimizations"
--- a/build/make/rtcd.sh
+++ b/build/make/rtcd.sh
@@ -290,8 +290,10 @@
 {$(set_function_pointers c $ALL_ARCHS)
#if HAVE_DSPR2
+#if CONFIG_VP8
void dsputil_static_init();
dsputil_static_init();
+#endif
#endif
}
#endif
--- /dev/null
+++ b/vpx_scale/mips/dspr2/yv12extend_dspr2.c
@@ -1,0 +1,149 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "vpx_scale/yv12config.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_scale/vpx_scale.h"
+
+#if HAVE_DSPR2
+static void extend_plane(uint8_t *const src, int src_stride,
+ int width, int height,
+ int extend_top, int extend_left,
+                         int extend_bottom, int extend_right) {+ int i, j;
+ uint8_t *left_src, *right_src;
+ uint8_t *left_dst_start, *right_dst_start;
+ uint8_t *left_dst, *right_dst;
+ uint8_t *top_src, *bot_src;
+ uint8_t *top_dst, *bot_dst;
+ uint32_t left_pix;
+ uint32_t right_pix;
+ uint32_t linesize;
+
+ /* copy the left and right most columns out */
+ left_src = src;
+ right_src = src + width - 1;
+ left_dst_start = src - extend_left;
+ right_dst_start = src + width;
+
+  for (i = height; i--; ) {+ left_dst = left_dst_start;
+ right_dst = right_dst_start;
+
+ __asm__ __volatile__ (
+ "lb %[left_pix], 0(%[left_src]) \n\t"
+ "lb %[right_pix], 0(%[right_src]) \n\t"
+ "replv.qb %[left_pix], %[left_pix] \n\t"
+ "replv.qb %[right_pix], %[right_pix] \n\t"
+
+ : [left_pix] "=&r" (left_pix), [right_pix] "=&r" (right_pix)
+ : [left_src] "r" (left_src), [right_src] "r" (right_src)
+ );
+
+    for (j = extend_left/4; j--; ) {+ __asm__ __volatile__ (
+ "sw %[left_pix], 0(%[left_dst]) \n\t"
+ "sw %[right_pix], 0(%[right_dst]) \n\t"
+
+ :
+ : [left_dst] "r" (left_dst), [left_pix] "r" (left_pix),
+ [right_dst] "r" (right_dst), [right_pix] "r" (right_pix)
+ );
+
+ left_dst += 4;
+ right_dst += 4;
+ }
+
+    for (j = extend_left%4; j--; ) {+ __asm__ __volatile__ (
+ "sb %[left_pix], 0(%[left_dst]) \n\t"
+ "sb %[right_pix], 0(%[right_dst]) \n\t"
+
+ :
+ : [left_dst] "r" (left_dst), [left_pix] "r" (left_pix),
+ [right_dst] "r" (right_dst), [right_pix] "r" (right_pix)
+ );
+
+ left_dst += 1;
+ right_dst += 1;
+ }
+
+ left_src += src_stride;
+ right_src += src_stride;
+ left_dst_start += src_stride;
+ right_dst_start += src_stride;
+ }
+
+ /* Now copy the top and bottom lines into each line of the respective
+ * borders
+ */
+ top_src = src - extend_left;
+ bot_src = src + src_stride * (height - 1) - extend_left;
+ top_dst = src + src_stride * (-extend_top) - extend_left;
+ bot_dst = src + src_stride * (height) - extend_left;
+ linesize = extend_left + extend_right + width;
+
+  for (i = 0; i < extend_top; i++) {+ vpx_memcpy(top_dst, top_src, linesize);
+ top_dst += src_stride;
+ }
+
+  for (i = 0; i < extend_bottom; i++) {+ vpx_memcpy(bot_dst, bot_src, linesize);
+ bot_dst += src_stride;
+ }
+}
+
+static void extend_frame(YV12_BUFFER_CONFIG *const ybf,
+ int subsampling_x, int subsampling_y,
+                         int ext_size) {+ const int c_w = (ybf->y_crop_width + subsampling_x) >> subsampling_x;
+ const int c_h = (ybf->y_crop_height + subsampling_y) >> subsampling_y;
+ const int c_et = ext_size >> subsampling_y;
+ const int c_el = ext_size >> subsampling_x;
+ const int c_eb = (ext_size + ybf->y_height - ybf->y_crop_height +
+ subsampling_y) >> subsampling_y;
+ const int c_er = (ext_size + ybf->y_width - ybf->y_crop_width +
+ subsampling_x) >> subsampling_x;
+
+ assert(ybf->y_height - ybf->y_crop_height < 16);
+ assert(ybf->y_width - ybf->y_crop_width < 16);
+ assert(ybf->y_height - ybf->y_crop_height >= 0);
+ assert(ybf->y_width - ybf->y_crop_width >= 0);
+
+ extend_plane(ybf->y_buffer, ybf->y_stride,
+ ybf->y_crop_width, ybf->y_crop_height,
+ ext_size, ext_size,
+ ext_size + ybf->y_height - ybf->y_crop_height,
+ ext_size + ybf->y_width - ybf->y_crop_width);
+
+ extend_plane(ybf->u_buffer, ybf->uv_stride,
+ c_w, c_h, c_et, c_el, c_eb, c_er);
+
+ extend_plane(ybf->v_buffer, ybf->uv_stride,
+ c_w, c_h, c_et, c_el, c_eb, c_er);
+}
+
+void vp9_extend_frame_borders_dspr2(YV12_BUFFER_CONFIG *ybf,
+                                int subsampling_x, int subsampling_y) {+ extend_frame(ybf, subsampling_x, subsampling_y, ybf->border);
+}
+
+void vp9_extend_frame_inner_borders_dspr2(YV12_BUFFER_CONFIG *ybf,
+ int subsampling_x,
+                                          int subsampling_y) {+ const int inner_bw = (ybf->border > VP9INNERBORDERINPIXELS) ?
+ VP9INNERBORDERINPIXELS : ybf->border;
+ extend_frame(ybf, subsampling_x, subsampling_y, inner_bw);
+}
+#endif
--- a/vpx_scale/vpx_scale.mk
+++ b/vpx_scale/vpx_scale.mk
@@ -16,6 +16,9 @@
SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM)
SCALE_SRCS-$(HAVE_NEON) += arm/neon/yv12extend_arm.c
+#mips(dspr2)
+SCALE_SRCS-$(HAVE_DSPR2) += mips/dspr2/yv12extend_dspr2.c
+
SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes)
$(eval $(call asm_offsets_template,\
--- a/vpx_scale/vpx_scale_rtcd.sh
+++ b/vpx_scale/vpx_scale_rtcd.sh
@@ -27,8 +27,8 @@
if [ "$CONFIG_VP9" = "yes" ]; then
prototype void vp9_extend_frame_borders "struct yv12_buffer_config *ybf, int subsampling_x, int subsampling_y"
- specialize vp9_extend_frame_borders
+ specialize vp9_extend_frame_borders dspr2
prototype void vp9_extend_frame_inner_borders "struct yv12_buffer_config *ybf, int subsampling_x, int subsampling_y"
- specialize vp9_extend_frame_inner_borders_c
+ specialize vp9_extend_frame_inner_borders dspr2
fi
--
⑨