shithub: libvpx

--- a/configure

+++ b/configure

@@ -242,7 +242,6 @@

     superblocks

     pred_filter

     lossless

-    newbestrefmv

     subpelrefmv

     new_mvref

     implicit_segmentation

--- a/test/boolcoder_test.cc

+++ /dev/null

@@ -1,90 +1,0 @@

-/*

- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.

- *

- *  Use of this source code is governed by a BSD-style license

- *  that can be found in the LICENSE file in the root of the source

- *  tree. An additional intellectual property rights grant can be found

- *  in the file PATENTS.  All contributing project authors may

- *  be found in the AUTHORS file in the root of the source tree.

- */

-extern "C" {

-#include "vp8/encoder/boolhuff.h"

-#include "vp8/decoder/dboolhuff.h"

-}

-#include <math.h>

-#include <stddef.h>

-#include <stdio.h>

-#include <stdlib.h>

-#include <string.h>

-#include <sys/types.h>

-#include "test/acm_random.h"

-#include "third_party/googletest/src/include/gtest/gtest.h"

-#include "vpx/vpx_integer.h"

-namespace {

-const int num_tests = 10;

-}  // namespace

-using libvpx_test::ACMRandom;

-TEST(VP8, TestBitIO) {

-  ACMRandom rnd(ACMRandom::DeterministicSeed());

-  for (int n = 0; n < num_tests; ++n) {

-    for (int method = 0; method <= 7; ++method) {   // we generate various proba

-      const int bits_to_test = 1000;

-      uint8_t probas[bits_to_test];

-      for (int i = 0; i < bits_to_test; ++i) {

-        const int parity = i & 1;

-        probas[i] =

-            (method == 0) ? 0 : (method == 1) ? 255 :

-            (method == 2) ? 128 :

-            (method == 3) ? rnd.Rand8() :

-            (method == 4) ? (parity ? 0 : 255) :

-            // alternate between low and high proba:

-            (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) :

-            (method == 6) ?

-                (parity ? rnd(64) : 255 - rnd(64)) :

-                (parity ? rnd(32) : 255 - rnd(32));

-      }

-      for (int bit_method = 0; bit_method <= 3; ++bit_method) {

-        const int random_seed = 6432;

-        const int buffer_size = 10000;

-        ACMRandom bit_rnd(random_seed);

-        BOOL_CODER bw;

-        uint8_t bw_buffer[buffer_size];

-        vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size);

-        int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;

-        for (int i = 0; i < bits_to_test; ++i) {

-          if (bit_method == 2) {

-            bit = (i & 1);

-          } else if (bit_method == 3) {

-            bit = bit_rnd(2);

-          }

-          vp8_encode_bool(&bw, bit, static_cast<int>(probas[i]));

-        }

-        vp8_stop_encode(&bw);

-        BOOL_DECODER br;

-        vp8dx_start_decode(&br, bw_buffer, buffer_size);

-        bit_rnd.Reset(random_seed);

-        for (int i = 0; i < bits_to_test; ++i) {

-          if (bit_method == 2) {

-            bit = (i & 1);

-          } else if (bit_method == 3) {

-            bit = bit_rnd(2);

-          }

-          GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit)

-              << "pos: "<< i << " / " << bits_to_test

-              << " bit_method: " << bit_method

-              << " method: " << method;

-        }

-      }

-    }

-  }

-}

--- a/test/dct16x16_test.cc

+++ b/test/dct16x16_test.cc

@@ -17,7 +17,7 @@

 extern "C" {

 #include "vp9/common/entropy.h"

 #include "vp9/common/idct.h"

-#include "vp9/encoder/dct.h"

+#include "vpx_rtcd.h"

 #include "acm_random.h"

@@ -256,7 +256,7 @@

-TEST(VP8Idct16x16Test, AccuracyCheck) {

+TEST(VP9Idct16x16Test, AccuracyCheck) {

   ACMRandom rnd(ACMRandom::DeterministicSeed());

   const int count_test_block = 1000;

   for (int i = 0; i < count_test_block; ++i) {

@@ -271,7 +271,7 @@

     reference_16x16_dct_2d(in, out_r);

     for (int j = 0; j < 256; j++)

       coeff[j] = round(out_r[j]);

-    vp8_short_idct16x16_c(coeff, out_c, 32);

+    vp9_short_idct16x16_c(coeff, out_c, 32);

     for (int j = 0; j < 256; ++j) {

       const int diff = out_c[j] - in[j];

       const int error = diff * diff;

@@ -280,7 +280,7 @@

           << " at index " << j;

-    vp8_short_fdct16x16_c(in, out_c, 32);

+    vp9_short_fdct16x16_c(in, out_c, 32);

     for (int j = 0; j < 256; ++j) {

       const double diff = coeff[j] - out_c[j];

       const double error = diff * diff;

@@ -291,7 +291,7 @@

-TEST(VP8Fdct16x16Test, AccuracyCheck) {

+TEST(VP9Fdct16x16Test, AccuracyCheck) {

   ACMRandom rnd(ACMRandom::DeterministicSeed());

   int max_error = 0;

   double total_error = 0;

@@ -306,8 +306,8 @@

       test_input_block[j] = rnd.Rand8() - rnd.Rand8();

     const int pitch = 32;

-    vp8_short_fdct16x16_c(test_input_block, test_temp_block, pitch);

-    vp8_short_idct16x16_c(test_temp_block, test_output_block, pitch);

+    vp9_short_fdct16x16_c(test_input_block, test_temp_block, pitch);

+    vp9_short_idct16x16_c(test_temp_block, test_output_block, pitch);

     for (int j = 0; j < 256; ++j) {

       const int diff = test_input_block[j] - test_output_block[j];

@@ -325,7 +325,7 @@

       << "Error: 16x16 FDCT/IDCT has average roundtrip error > 1/10 per block";

-TEST(VP8Fdct16x16Test, CoeffSizeCheck) {

+TEST(VP9Fdct16x16Test, CoeffSizeCheck) {

   ACMRandom rnd(ACMRandom::DeterministicSeed());

   const int count_test_block = 1000;

   for (int i = 0; i < count_test_block; ++i) {

@@ -342,8 +342,8 @@

         input_extreme_block[j] = 255;

     const int pitch = 32;

-    vp8_short_fdct16x16_c(input_block, output_block, pitch);

-    vp8_short_fdct16x16_c(input_extreme_block, output_extreme_block, pitch);

+    vp9_short_fdct16x16_c(input_block, output_block, pitch);

+    vp9_short_fdct16x16_c(input_extreme_block, output_extreme_block, pitch);

     // The minimum quant value is 4.

     for (int j = 0; j < 256; ++j) {

--- a/test/fdct4x4_test.cc

+++ b/test/fdct4x4_test.cc

@@ -16,7 +16,7 @@

 extern "C" {

 #include "vp9/common/idct.h"

-#include "vp9/encoder/dct.h"

+#include "vpx_rtcd.h"

 #include "acm_random.h"

@@ -26,7 +26,7 @@

 namespace {

-TEST(Vp8FdctTest, SignBiasCheck) {

+TEST(Vp9FdctTest, SignBiasCheck) {

   ACMRandom rnd(ACMRandom::DeterministicSeed());

   int16_t test_input_block[16];

   int16_t test_output_block[16];

@@ -43,7 +43,7 @@

     // TODO(Yaowu): this should be converted to a parameterized test

     // to test optimized versions of this function.

-    vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);

+    vp9_short_fdct4x4_c(test_input_block, test_output_block, pitch);

     for (int j = 0; j < 16; ++j) {

       if (test_output_block[j] < 0)

@@ -70,7 +70,7 @@

     // TODO(Yaowu): this should be converted to a parameterized test

     // to test optimized versions of this function.

-    vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);

+    vp9_short_fdct4x4_c(test_input_block, test_output_block, pitch);

     for (int j = 0; j < 16; ++j) {

       if (test_output_block[j] < 0)

@@ -89,7 +89,7 @@

};

-TEST(Vp8FdctTest, RoundTripErrorCheck) {

+TEST(Vp9FdctTest, RoundTripErrorCheck) {

   ACMRandom rnd(ACMRandom::DeterministicSeed());

   int max_error = 0;

   double total_error = 0;

@@ -106,7 +106,7 @@

     // TODO(Yaowu): this should be converted to a parameterized test

     // to test optimized versions of this function.

     const int pitch = 8;

-    vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch);

+    vp9_short_fdct4x4_c(test_input_block, test_temp_block, pitch);

     for (int j = 0; j < 16; ++j) {

         if(test_temp_block[j] > 0) {

@@ -121,7 +121,7 @@

     // Because the bitstream is not frozen yet, use the idct in the codebase.

-    vp8_short_idct4x4llm_c(test_temp_block, test_output_block, pitch);

+    vp9_short_idct4x4llm_c(test_temp_block, test_output_block, pitch);

     for (int j = 0; j < 16; ++j) {

       const int diff = test_input_block[j] - test_output_block[j];

--- a/test/fdct8x8_test.cc

+++ b/test/fdct8x8_test.cc

@@ -15,8 +15,8 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

 extern "C" {

-#include "vp9/encoder/dct.h"

 #include "vp9/common/idct.h"

+#include "vpx_rtcd.h"

 #include "acm_random.h"

@@ -26,7 +26,7 @@

 namespace {

-TEST(VP8Fdct8x8Test, SignBiasCheck) {

+TEST(VP9Fdct8x8Test, SignBiasCheck) {

   ACMRandom rnd(ACMRandom::DeterministicSeed());

   int16_t test_input_block[64];

   int16_t test_output_block[64];

@@ -41,7 +41,7 @@

     for (int j = 0; j < 64; ++j)

       test_input_block[j] = rnd.Rand8() - rnd.Rand8();

-    vp8_short_fdct8x8_c(test_input_block, test_output_block, pitch);

+    vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);

     for (int j = 0; j < 64; ++j) {

       if (test_output_block[j] < 0)

@@ -66,7 +66,7 @@

     for (int j = 0; j < 64; ++j)

       test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);

-    vp8_short_fdct8x8_c(test_input_block, test_output_block, pitch);

+    vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);

     for (int j = 0; j < 64; ++j) {

       if (test_output_block[j] < 0)

@@ -85,7 +85,7 @@

};

-TEST(VP8Fdct8x8Test, RoundTripErrorCheck) {

+TEST(VP9Fdct8x8Test, RoundTripErrorCheck) {

   ACMRandom rnd(ACMRandom::DeterministicSeed());

   int max_error = 0;

   double total_error = 0;

@@ -100,7 +100,7 @@

       test_input_block[j] = rnd.Rand8() - rnd.Rand8();

     const int pitch = 16;

-    vp8_short_fdct8x8_c(test_input_block, test_temp_block, pitch);

+    vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);

     for (int j = 0; j < 64; ++j){

         if(test_temp_block[j] > 0) {

           test_temp_block[j] += 2;

@@ -112,7 +112,7 @@

           test_temp_block[j] *= 4;

-    vp8_short_idct8x8_c(test_temp_block, test_output_block, pitch);

+    vp9_short_idct8x8_c(test_temp_block, test_output_block, pitch);

     for (int j = 0; j < 64; ++j) {

       const int diff = test_input_block[j] - test_output_block[j];

@@ -130,7 +130,7 @@

       << "Error: 8x8 FDCT/IDCT has average roundtrip error > 1/5 per block";

};

-TEST(VP8Fdct8x8Test, ExtremalCheck) {

+TEST(VP9Fdct8x8Test, ExtremalCheck) {

   ACMRandom rnd(ACMRandom::DeterministicSeed());

   int max_error = 0;

   double total_error = 0;

@@ -145,8 +145,8 @@

       test_input_block[j] = rnd.Rand8() % 2 ? 255 : -255;

     const int pitch = 16;

-    vp8_short_fdct8x8_c(test_input_block, test_temp_block, pitch);

-    vp8_short_idct8x8_c(test_temp_block, test_output_block, pitch);

+    vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);

+    vp9_short_idct8x8_c(test_temp_block, test_output_block, pitch);

     for (int j = 0; j < 64; ++j) {

       const int diff = test_input_block[j] - test_output_block[j];

--- a/test/idct8x8_test.cc

+++ b/test/idct8x8_test.cc

@@ -15,8 +15,8 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

 extern "C" {

-#include "vp9/encoder/dct.h"

 #include "vp9/common/idct.h"

+#include "vpx_rtcd.h"

 #include "acm_random.h"

@@ -99,7 +99,7 @@

     output[i] = round(out2[i]/32);

-TEST(VP8Idct8x8Test, AccuracyCheck) {

+TEST(VP9Idct8x8Test, AccuracyCheck) {

   ACMRandom rnd(ACMRandom::DeterministicSeed());

   const int count_test_block = 10000;

   for (int i = 0; i < count_test_block; ++i) {

@@ -112,7 +112,7 @@

       input[j] = rnd.Rand8() - rnd.Rand8();

     const int pitch = 16;

-    vp8_short_fdct8x8_c(input, output_c, pitch);

+    vp9_short_fdct8x8_c(input, output_c, pitch);

     reference_dct_2d(input, output_r);

     for (int j = 0; j < 64; ++j) {

@@ -140,7 +140,7 @@

     reference_dct_2d(input, output_r);

     for (int j = 0; j < 64; ++j)

       coeff[j] = round(output_r[j]);

-    vp8_short_idct8x8_c(coeff, output_c, pitch);

+    vp9_short_idct8x8_c(coeff, output_c, pitch);

     for (int j = 0; j < 64; ++j) {

       const int diff = output_c[j] -input[j];

       const int error = diff * diff;

--- a/test/test.mk

+++ b/test/test.mk

@@ -33,9 +33,12 @@

##

 ifeq ($(CONFIG_SHARED),)

+## VP8

+ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)

 # These tests require both the encoder and decoder to be built.

 ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes)

-LIBVPX_TEST_SRCS-yes                   += boolcoder_test.cc

+LIBVPX_TEST_SRCS-yes                   += vp8_boolcoder_test.cc

 endif

 LIBVPX_TEST_SRCS-yes                   += idctllm_test.cc

@@ -47,13 +50,22 @@

 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc

-# VP9 tests

+endif # VP8

+## VP9

+ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)

+# These tests require both the encoder and decoder to be built.

+ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),yesyes)

+LIBVPX_TEST_SRCS-yes                   += vp9_boolcoder_test.cc

+endif

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc

-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc

-ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)

+#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc

 LIBVPX_TEST_SRCS-yes += idct8x8_test.cc

-endif

+endif # VP9

 endif

--- /dev/null

+++ b/test/vp8_boolcoder_test.cc

@@ -1,0 +1,90 @@

+/*

+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+extern "C" {

+#include "vp8/encoder/boolhuff.h"

+#include "vp8/decoder/dboolhuff.h"

+}

+#include <math.h>

+#include <stddef.h>

+#include <stdio.h>

+#include <stdlib.h>

+#include <string.h>

+#include <sys/types.h>

+#include "test/acm_random.h"

+#include "third_party/googletest/src/include/gtest/gtest.h"

+#include "vpx/vpx_integer.h"

+namespace {

+const int num_tests = 10;

+}  // namespace

+using libvpx_test::ACMRandom;

+TEST(VP8, TestBitIO) {

+  ACMRandom rnd(ACMRandom::DeterministicSeed());

+  for (int n = 0; n < num_tests; ++n) {

+    for (int method = 0; method <= 7; ++method) {   // we generate various proba

+      const int bits_to_test = 1000;

+      uint8_t probas[bits_to_test];

+      for (int i = 0; i < bits_to_test; ++i) {

+        const int parity = i & 1;

+        probas[i] =

+            (method == 0) ? 0 : (method == 1) ? 255 :

+            (method == 2) ? 128 :

+            (method == 3) ? rnd.Rand8() :

+            (method == 4) ? (parity ? 0 : 255) :

+            // alternate between low and high proba:

+            (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) :

+            (method == 6) ?

+                (parity ? rnd(64) : 255 - rnd(64)) :

+                (parity ? rnd(32) : 255 - rnd(32));

+      }

+      for (int bit_method = 0; bit_method <= 3; ++bit_method) {

+        const int random_seed = 6432;

+        const int buffer_size = 10000;

+        ACMRandom bit_rnd(random_seed);

+        BOOL_CODER bw;

+        uint8_t bw_buffer[buffer_size];

+        vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size);

+        int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;

+        for (int i = 0; i < bits_to_test; ++i) {

+          if (bit_method == 2) {

+            bit = (i & 1);

+          } else if (bit_method == 3) {

+            bit = bit_rnd(2);

+          }

+          vp8_encode_bool(&bw, bit, static_cast<int>(probas[i]));

+        }

+        vp8_stop_encode(&bw);

+        BOOL_DECODER br;

+        vp8dx_start_decode(&br, bw_buffer, buffer_size);

+        bit_rnd.Reset(random_seed);

+        for (int i = 0; i < bits_to_test; ++i) {

+          if (bit_method == 2) {

+            bit = (i & 1);

+          } else if (bit_method == 3) {

+            bit = bit_rnd(2);

+          }

+          GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit)

+              << "pos: "<< i << " / " << bits_to_test

+              << " bit_method: " << bit_method

+              << " method: " << method;

+        }

+      }

+    }

+  }

+}

--- /dev/null

+++ b/test/vp9_boolcoder_test.cc

@@ -1,0 +1,88 @@

+/*

+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include <math.h>

+#include <stdlib.h>

+#include <string.h>

+#include "third_party/googletest/src/include/gtest/gtest.h"

+extern "C" {

+#include "vp9/encoder/boolhuff.h"

+#include "vp9/decoder/dboolhuff.h"

+}

+#include "acm_random.h"

+#include "vpx/vpx_integer.h"

+using libvpx_test::ACMRandom;

+namespace {

+const int num_tests = 10;

+}  // namespace

+TEST(VP9, TestBitIO) {

+  ACMRandom rnd(ACMRandom::DeterministicSeed());

+  for (int n = 0; n < num_tests; ++n) {

+    for (int method = 0; method <= 7; ++method) {   // we generate various proba

+      const int bits_to_test = 1000;

+      uint8_t probas[bits_to_test];

+      for (int i = 0; i < bits_to_test; ++i) {

+        const int parity = i & 1;

+        probas[i] =

+          (method == 0) ? 0 : (method == 1) ? 255 :

+          (method == 2) ? 128 :

+          (method == 3) ? rnd.Rand8() :

+          (method == 4) ? (parity ? 0 : 255) :

+            // alternate between low and high proba:

+            (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) :

+            (method == 6) ?

+            (parity ? rnd(64) : 255 - rnd(64)) :

+            (parity ? rnd(32) : 255 - rnd(32));

+      }

+      for (int bit_method = 0; bit_method <= 3; ++bit_method) {

+        const int random_seed = 6432;

+        const int buffer_size = 10000;

+        ACMRandom bit_rnd(random_seed);

+        BOOL_CODER bw;

+        uint8_t bw_buffer[buffer_size];

+        vp9_start_encode(&bw, bw_buffer);

+        int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;

+        for (int i = 0; i < bits_to_test; ++i) {

+          if (bit_method == 2) {

+            bit = (i & 1);

+          } else if (bit_method == 3) {

+            bit = bit_rnd(2);

+          }

+          encode_bool(&bw, bit, static_cast<int>(probas[i]));

+        }

+        vp9_stop_encode(&bw);

+        BOOL_DECODER br;

+        vp9_start_decode(&br, bw_buffer, buffer_size);

+        bit_rnd.Reset(random_seed);

+        for (int i = 0; i < bits_to_test; ++i) {

+          if (bit_method == 2) {

+            bit = (i & 1);

+          } else if (bit_method == 3) {

+            bit = bit_rnd(2);

+          }

+          GTEST_ASSERT_EQ(decode_bool(&br, probas[i]), bit)

+              << "pos: " << i << " / " << bits_to_test

+              << " bit_method: " << bit_method

+              << " method: " << method;

+        }

+      }

+    }

+  }

+}

--- a/vp9/common/blockd.h

+++ b/vp9/common/blockd.h

@@ -44,9 +44,7 @@

 /* Segment Feature Masks */

 #define SEGMENT_DELTADATA   0

 #define SEGMENT_ABSDATA     1

-#if CONFIG_NEWBESTREFMV || CONFIG_NEW_MVREF

 #define MAX_MV_REFS 19

-#endif

 typedef struct {

   int r, c;

@@ -216,9 +214,7 @@

   MV_REFERENCE_FRAME ref_frame, second_ref_frame;

   TX_SIZE txfm_size;

   int_mv mv[2]; // for each reference frame used

-#if CONFIG_NEWBESTREFMV || CONFIG_NEW_MVREF

   int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REFS];

-#endif

   SPLITMV_PARTITIONING_TYPE partitioning;

   unsigned char mb_skip_coeff;                                /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */

@@ -280,7 +276,7 @@

   DECLARE_ALIGNED(16, unsigned char,  predictor[384]);

   DECLARE_ALIGNED(16, short, qcoeff[400]);

   DECLARE_ALIGNED(16, short, dqcoeff[400]);

-  DECLARE_ALIGNED(16, char,  eobs[25]);

+  DECLARE_ALIGNED(16, unsigned short,  eobs[25]);

   /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */

   BLOCKD block[25];

@@ -467,7 +463,10 @@

   TX_TYPE tx_type = DCT_DCT;

   if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&

       xd->q_index < ACTIVE_HT8) {

-    tx_type = txfm_map(pred_mode_conv(b->bmi.as_mode.first));

+    // TODO(rbultje): MB_PREDICTION_MODE / B_PREDICTION_MODE should be merged

+    // or the relationship otherwise modified to address this type conversion.

+    tx_type = txfm_map(pred_mode_conv(

+                  (MB_PREDICTION_MODE)b->bmi.as_mode.first));

   return tx_type;

@@ -483,7 +482,7 @@

 static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) {

   TX_TYPE tx_type = DCT_DCT;

-  int ib = (b - xd->block);

+  int ib = (int)(b - xd->block);

   if (ib >= 16)

     return tx_type;

   if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) {

--- a/vp9/common/findnearmv.c

+++ b/vp9/common/findnearmv.c

@@ -11,6 +11,7 @@

 #include "findnearmv.h"

 #include "vp9/common/sadmxn.h"

+#include "vp9/common/subpelvar.h"

 #include <limits.h>

 const unsigned char vp9_mbsplit_offset[4][16] = {

@@ -167,7 +168,6 @@

   return p;

-#if CONFIG_NEWBESTREFMV

 #define SP(x) (((x) & 7) << 1)

 unsigned int vp9_sad3x16_c(

   const unsigned char *src_ptr,

@@ -186,6 +186,76 @@

   return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3);

+#if CONFIG_SUBPELREFMV

+unsigned int vp9_variance2x16_c(const unsigned char *src_ptr,

+                                const int  source_stride,

+                                const unsigned char *ref_ptr,

+                                const int  recon_stride,

+                                unsigned int *sse) {

+  unsigned int var;

+  int avg;

+  variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, &var, &avg);

+  *sse = var;

+  return (var - ((avg * avg) >> 5));

+}

+unsigned int vp9_variance16x2_c(const unsigned char *src_ptr,

+                                const int  source_stride,

+                                const unsigned char *ref_ptr,

+                                const int  recon_stride,

+                                unsigned int *sse) {

+  unsigned int var;

+  int avg;

+  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, &var, &avg);

+  *sse = var;

+  return (var - ((avg * avg) >> 5));

+}

+unsigned int vp9_sub_pixel_variance16x2_c(const unsigned char  *src_ptr,

+                                          const int  src_pixels_per_line,

+                                          const int  xoffset,

+                                          const int  yoffset,

+                                          const unsigned char *dst_ptr,

+                                          const int dst_pixels_per_line,

+                                          unsigned int *sse) {

+  unsigned short FData3[16 * 3];  // Temp data bufffer used in filtering

+  unsigned char  temp2[20 * 16];

+  const short *HFilter, *VFilter;

+  HFilter = vp9_bilinear_filters[xoffset];

+  VFilter = vp9_bilinear_filters[yoffset];

+  var_filter_block2d_bil_first_pass(src_ptr, FData3,

+                                    src_pixels_per_line, 1, 3, 16, HFilter);

+  var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter);

+  return vp9_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);

+}

+unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char  *src_ptr,

+                                          const int  src_pixels_per_line,

+                                          const int  xoffset,

+                                          const int  yoffset,

+                                          const unsigned char *dst_ptr,

+                                          const int dst_pixels_per_line,

+                                          unsigned int *sse) {

+  unsigned short FData3[2 * 17];  // Temp data bufffer used in filtering

+  unsigned char  temp2[2 * 16];

+  const short *HFilter, *VFilter;

+  HFilter = vp9_bilinear_filters[xoffset];

+  VFilter = vp9_bilinear_filters[yoffset];

+  var_filter_block2d_bil_first_pass(src_ptr, FData3,

+                                    src_pixels_per_line, 1, 17, 2, HFilter);

+  var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter);

+  return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse);

+}

+#endif

 /* check a list of motion vectors by sad score using a number rows of pixels

  * above and a number cols of pixels in the left to select the one with best

  * score to use as ref motion vector

@@ -323,5 +393,3 @@

   // Copy back the re-ordered mv list

   vpx_memcpy(mvlist, sorted_mvs, sizeof(sorted_mvs));

-#endif  // CONFIG_NEWBESTREFMV

--- a/vp9/common/findnearmv.h

+++ b/vp9/common/findnearmv.h

@@ -18,7 +18,6 @@

 #include "treecoder.h"

 #include "onyxc_int.h"

-#if CONFIG_NEWBESTREFMV

 /* check a list of motion vectors by sad score using a number rows of pixels

  * above and a number cols of pixels in the left to select the one with best

  * score to use as ref motion vector

@@ -30,7 +29,6 @@

                            int_mv *best_mv,

                            int_mv *nearest,

                            int_mv *near);

-#endif

 static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias) {

   MV xmv;

--- a/vp9/common/idctllm.c

+++ b/vp9/common/idctllm.c

@@ -1013,6 +1013,8 @@

 #endif

+#define TEST_INT_16x16_IDCT 1

+#if !TEST_INT_16x16_IDCT

 static const double C1 = 0.995184726672197;

 static const double C2 = 0.98078528040323;

 static const double C3 = 0.956940335732209;

@@ -1273,3 +1275,235 @@

   vp9_clear_system_state(); // Make it simd safe : __asm emms;

+#else

+static const int16_t C1 = 16305;

+static const int16_t C2 = 16069;

+static const int16_t C3 = 15679;

+static const int16_t C4 = 15137;

+static const int16_t C5 = 14449;

+static const int16_t C6 = 13623;

+static const int16_t C7 = 12665;

+static const int16_t C8 = 11585;

+static const int16_t C9 = 10394;

+static const int16_t C10 = 9102;

+static const int16_t C11 = 7723;

+static const int16_t C12 = 6270;

+static const int16_t C13 = 4756;

+static const int16_t C14 = 3196;

+static const int16_t C15 = 1606;

+#define INITIAL_SHIFT 2

+#define INITIAL_ROUNDING (1 << (INITIAL_SHIFT - 1))

+#define RIGHT_SHIFT 14

+#define RIGHT_ROUNDING (1 << (RIGHT_SHIFT - 1))

+static void butterfly_16x16_idct_1d(int16_t input[16], int16_t output[16],

+                                    int last_shift_bits) {

+    int16_t step[16];

+    int intermediate[16];

+    int temp1, temp2;

+    int step1_shift = RIGHT_SHIFT + INITIAL_SHIFT;

+    int step1_rounding = 1 << (step1_shift - 1);

+    int last_rounding = 0;

+    if (last_shift_bits > 0)

+      last_rounding = 1 << (last_shift_bits - 1);

+    // step 1 and 2

+    step[ 0] = (input[0] + input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;

+    step[ 1] = (input[0] - input[8] + INITIAL_ROUNDING) >> INITIAL_SHIFT;

+    temp1 = input[4] * C12;

+    temp2 = input[12] * C4;

+    temp1 = (temp1 - temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1  *= C8;

+    step[ 2] = (2 * (temp1) + step1_rounding) >> step1_shift;

+    temp1 = input[4] * C4;

+    temp2 = input[12] * C12;

+    temp1 = (temp1 + temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 *= C8;

+    step[ 3] = (2 * (temp1) + step1_rounding) >> step1_shift;

+    temp1 = input[2] * C8;

+    temp1 = (2 * (temp1) +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp2 = input[6] + input[10];

+    step[ 4] = (temp1 + temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT;

+    step[ 5] = (temp1 - temp2 + INITIAL_ROUNDING) >> INITIAL_SHIFT;

+    temp1 = input[14] * C8;

+    temp1 = (2 * (temp1) +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp2 = input[6] - input[10];

+    step[ 6] = (temp2 - temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;

+    step[ 7] = (temp2 + temp1 + INITIAL_ROUNDING) >> INITIAL_SHIFT;

+    // for odd input

+    temp1 = input[3] * C12;

+    temp2 = input[13] * C4;

+    temp1 = (temp1 + temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 *= C8;

+    intermediate[ 8] = (2 * (temp1) +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = input[3] * C4;

+    temp2 = input[13] * C12;

+    temp2 = (temp2 - temp1 + RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp2 *= C8;

+    intermediate[ 9] = (2 * (temp2) +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    intermediate[10] = (2 * (input[9] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    intermediate[11] = input[15] - input[1];

+    intermediate[12] = input[15] + input[1];

+    intermediate[13] = (2 * (input[7] * C8) + RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = input[11] * C12;

+    temp2 = input[5] * C4;

+    temp2 = (temp2 - temp1 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp2 *= C8;

+    intermediate[14] = (2 * (temp2) +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = input[11] * C4;

+    temp2 = input[5] * C12;

+    temp1 = (temp1 + temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 *= C8;

+    intermediate[15] = (2 * (temp1) +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    step[ 8] = (intermediate[ 8] + intermediate[14] + INITIAL_ROUNDING)

+        >> INITIAL_SHIFT;

+    step[ 9] = (intermediate[ 9] + intermediate[15] + INITIAL_ROUNDING)

+        >> INITIAL_SHIFT;

+    step[10] = (intermediate[10] + intermediate[11] + INITIAL_ROUNDING)

+        >> INITIAL_SHIFT;

+    step[11] = (intermediate[10] - intermediate[11] + INITIAL_ROUNDING)

+        >> INITIAL_SHIFT;

+    step[12] = (intermediate[12] + intermediate[13] + INITIAL_ROUNDING)

+        >> INITIAL_SHIFT;

+    step[13] = (intermediate[12] - intermediate[13] + INITIAL_ROUNDING)

+        >> INITIAL_SHIFT;

+    step[14] = (intermediate[ 8] - intermediate[14] + INITIAL_ROUNDING)

+        >> INITIAL_SHIFT;

+    step[15] = (intermediate[ 9] - intermediate[15] + INITIAL_ROUNDING)

+        >> INITIAL_SHIFT;

+    // step 3

+    output[0] = step[ 0] + step[ 3];

+    output[1] = step[ 1] + step[ 2];

+    output[2] = step[ 1] - step[ 2];

+    output[3] = step[ 0] - step[ 3];

+    temp1 = step[ 4] * C14;

+    temp2 = step[ 7] * C2;

+    output[4] =  (temp1 - temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = step[ 4] * C2;

+    temp2 = step[ 7] * C14;

+    output[7] =  (temp1 + temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = step[ 5] * C10;

+    temp2 = step[ 6] * C6;

+    output[5] =  (temp1 - temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = step[ 5] * C6;

+    temp2 = step[ 6] * C10;

+    output[6] =  (temp1 + temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    output[8] = step[ 8] + step[11];

+    output[9] = step[ 9] + step[10];

+    output[10] = step[ 9] - step[10];

+    output[11] = step[ 8] - step[11];

+    output[12] = step[12] + step[15];

+    output[13] = step[13] + step[14];

+    output[14] = step[13] - step[14];

+    output[15] = step[12] - step[15];

+    // output 4

+    step[ 0] = output[0] + output[7];

+    step[ 1] = output[1] + output[6];

+    step[ 2] = output[2] + output[5];

+    step[ 3] = output[3] + output[4];

+    step[ 4] = output[3] - output[4];

+    step[ 5] = output[2] - output[5];

+    step[ 6] = output[1] - output[6];

+    step[ 7] = output[0] - output[7];

+    temp1 = output[8] * C7;

+    temp2 = output[15] * C9;

+    step[ 8] = (temp1 - temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = output[9] * C11;

+    temp2 = output[14] * C5;

+    step[ 9] = (temp1 + temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = output[10] * C3;

+    temp2 = output[13] * C13;

+    step[10] = (temp1 - temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = output[11] * C15;

+    temp2 = output[12] * C1;

+    step[11] = (temp1 + temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = output[11] * C1;

+    temp2 = output[12] * C15;

+    step[12] = (temp2 - temp1 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = output[10] * C13;

+    temp2 = output[13] * C3;

+    step[13] = (temp1 + temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = output[9] * C5;

+    temp2 = output[14] * C11;

+    step[14] = (temp2 - temp1 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    temp1 = output[8] * C9;

+    temp2 = output[15] * C7;

+    step[15] = (temp1 + temp2 +   RIGHT_ROUNDING) >> RIGHT_SHIFT;

+    // step 5

+    output[0] = (step[0] + step[15] + last_rounding) >> last_shift_bits;

+    output[1] = (step[1] + step[14] + last_rounding) >> last_shift_bits;

+    output[2] = (step[2] + step[13] + last_rounding) >> last_shift_bits;

+    output[3] = (step[3] + step[12] + last_rounding) >> last_shift_bits;

+    output[4] = (step[4] + step[11] + last_rounding) >> last_shift_bits;

+    output[5] = (step[5] + step[10] + last_rounding) >> last_shift_bits;

+    output[6] = (step[6] + step[ 9] + last_rounding) >> last_shift_bits;

+    output[7] = (step[7] + step[ 8] + last_rounding) >> last_shift_bits;

+    output[15] = (step[0] - step[15] + last_rounding) >> last_shift_bits;

+    output[14] = (step[1] - step[14] + last_rounding) >> last_shift_bits;

+    output[13] = (step[2] - step[13] + last_rounding) >> last_shift_bits;

+    output[12] = (step[3] - step[12] + last_rounding) >> last_shift_bits;

+    output[11] = (step[4] - step[11] + last_rounding) >> last_shift_bits;

+    output[10] = (step[5] - step[10] + last_rounding) >> last_shift_bits;

+    output[9] = (step[6] - step[ 9] + last_rounding) >> last_shift_bits;

+    output[8] = (step[7] - step[ 8] + last_rounding) >> last_shift_bits;

+}

+void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) {

+    int16_t out[16 * 16];

+    int16_t *outptr = &out[0];

+    const int short_pitch = pitch >> 1;

+    int i, j;

+    int16_t temp_in[16], temp_out[16];

+    // First transform rows

+    for (i = 0; i < 16; ++i) {

+      butterfly_16x16_idct_1d(input, outptr, 0);

+      input += short_pitch;

+      outptr += 16;

+    }

+    // Then transform columns

+    for (i = 0; i < 16; ++i) {

+      for (j = 0; j < 16; ++j)

+        temp_in[j] = out[j * 16 + i];

+      butterfly_16x16_idct_1d(temp_in, temp_out, 3);

+      for (j = 0; j < 16; ++j)

+        output[j * 16 + i] = temp_out[j];

+    }

+}

+#undef INITIAL_SHIFT

+#undef INITIAL_ROUNDING

+#undef RIGHT_SHIFT

+#undef RIGHT_ROUNDING

+#endif

--- a/vp9/common/mv.h

+++ b/vp9/common/mv.h

@@ -18,9 +18,9 @@

   short col;

 } MV;

-typedef union {

-  uint32_t  as_int;

-  MV        as_mv;

-} int_mv;        /* facilitates faster equality tests and copies */

+typedef union int_mv {

+  uint32_t as_int;

+  MV as_mv;

+} int_mv; /* facilitates faster equality tests and copies */

 #endif

--- a/vp9/common/mvref_common.c

+++ b/vp9/common/mvref_common.c

@@ -10,8 +10,6 @@

 #include "mvref_common.h"

-#if CONFIG_NEWBESTREFMV

 #define MVREF_NEIGHBOURS 8

 static int mv_ref_search[MVREF_NEIGHBOURS][2] =

   { {0,-1},{-1,0},{-1,-1},{0,-2},{-2,0},{-1,-2},{-2,-1},{-2,-2} };

@@ -338,5 +336,3 @@

   // Copy over the candidate list.

   vpx_memcpy(mv_ref_list, candidate_mvs, sizeof(candidate_mvs));

-#endif

--- a/vp9/common/mvref_common.h

+++ b/vp9/common/mvref_common.h

@@ -11,8 +11,6 @@

 #include "onyxc_int.h"

 #include "blockd.h"

-// MR reference entropy header file.

-#if CONFIG_NEWBESTREFMV

 #ifndef __INC_MVREF_COMMON_H

 #define __INC_MVREF_COMMON_H

@@ -28,4 +26,3 @@

 #endif

-#endif

--- a/vp9/common/postproc.c

+++ b/vp9/common/postproc.c

@@ -12,6 +12,7 @@

 #include "vpx_ports/config.h"

 #include "vpx_scale/yv12config.h"

 #include "postproc.h"

+#include "vp9/common/textblit.h"

 #include "vpx_scale/vpxscale.h"

 #include "systemdependent.h"

@@ -126,10 +127,6 @@

};

-extern void vp9_blit_text(const char *msg, unsigned char *address,

-                          const int pitch);

-extern void vp9_blit_line(int x0, int x1, int y0, int y1,

-                          unsigned char *image, const int pitch);

 /****************************************************************************

*/

 void vp9_post_proc_down_and_across_c(unsigned char *src_ptr,

--- a/vp9/common/quant_common.c

+++ b/vp9/common/quant_common.c

@@ -30,9 +30,9 @@

     last_val = current_val;

     ac_val = ac_qlookup[i];

-    dc_qlookup[i] = (0.000000305 * ac_val * ac_val * ac_val) +

-                    (-0.00065 * ac_val * ac_val) +

-                    (0.9 * ac_val) + 0.5;

+    dc_qlookup[i] = (int)((0.000000305 * ac_val * ac_val * ac_val) +

+                          (-0.00065 * ac_val * ac_val) +

+                          (0.9 * ac_val) + 0.5);

     if (dc_qlookup[i] < ACDC_MIN)

       dc_qlookup[i] = ACDC_MIN;

--- a/vp9/common/reconintra.h

+++ b/vp9/common/reconintra.h

@@ -13,6 +13,6 @@

 #include "blockd.h"

-extern void init_intra_left_above_pixels(MACROBLOCKD *xd);

+extern void vp9_recon_intra_mbuv(MACROBLOCKD *xd);

 #endif  // __INC_RECONINTRA_H

--- a/vp9/common/rtcd_defs.sh

+++ b/vp9/common/rtcd_defs.sh

@@ -11,6 +11,8 @@

 struct macroblock;

 struct variance_vtable;

+#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]

 /* Encoder forward decls */

 struct variance_vtable;

 union int_mv;

@@ -43,18 +45,24 @@

 prototype void vp9_dequantize_b_2x2 "struct blockd *x"

 specialize vp9_dequantize_b_2x2

-prototype void vp9_dequant_dc_idct_add_y_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs, short *dc, struct macroblockd *xd"

+prototype void vp9_dequant_dc_idct_add_y_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, short *dc, struct macroblockd *xd"

 specialize vp9_dequant_dc_idct_add_y_block_8x8

-prototype void vp9_dequant_idct_add_y_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs, struct macroblockd *xd"

+prototype void vp9_dequant_idct_add_y_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, struct macroblockd *xd"

 specialize vp9_dequant_idct_add_y_block_8x8

-prototype void vp9_dequant_idct_add_uv_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, char *eobs, struct macroblockd *xd"

+prototype void vp9_dequant_idct_add_uv_block_8x8 "short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, unsigned short *eobs, struct macroblockd *xd"

 specialize vp9_dequant_idct_add_uv_block_8x8

 prototype void vp9_dequant_idct_add_16x16 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride"

 specialize vp9_dequant_idct_add_16x16

+prototype void vp9_dequant_idct_add_8x8 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride"

+specialize vp9_dequant_idct_add_8x8

+prototype void vp9_dequant_dc_idct_add_8x8 "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc"

+specialize vp9_dequant_dc_idct_add_8x8

 prototype void vp9_dequant_idct_add "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride"

 specialize vp9_dequant_idct_add

@@ -61,13 +69,13 @@

 prototype void vp9_dequant_dc_idct_add "short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc"

 specialize vp9_dequant_dc_idct_add

-prototype void vp9_dequant_dc_idct_add_y_block "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs, short *dc"

+prototype void vp9_dequant_dc_idct_add_y_block "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, short *dc"

 specialize vp9_dequant_dc_idct_add_y_block mmx

-prototype void vp9_dequant_idct_add_y_block "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, char *eobs"

+prototype void vp9_dequant_idct_add_y_block "short *q, short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs"

 specialize vp9_dequant_idct_add_y_block mmx

-prototype void vp9_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, char *eobs"

+prototype void vp9_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, unsigned short *eobs"

 specialize vp9_dequant_idct_add_uv_block mmx

@@ -219,13 +227,11 @@

 # sad 16x3, 3x16

-if [ "$CONFIG_NEWBESTREFMV" = "yes" ]; then

 prototype unsigned int vp9_sad16x3 "const unsigned char *src_ptr, int  src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"

 specialize vp9_sad16x3 sse2

 prototype unsigned int vp9_sad3x16 "const unsigned char *src_ptr, int  src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"

 specialize vp9_sad3x16 sse2

-fi

 # Encoder functions below this point.

@@ -368,22 +374,22 @@

 prototype void vp9_sad4x4x8 "const unsigned char *src_ptr, int  src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"

 specialize vp9_sad4x4x8 sse4

-prototype void vp9_sad32x32x4d "const unsigned char *src_ptr, int  src_stride, unsigned char *ref_ptr[], int  ref_stride, unsigned int *sad_array"

+prototype void vp9_sad32x32x4d "const unsigned char *src_ptr, int  src_stride, const unsigned char* const ref_ptr[], int  ref_stride, unsigned int *sad_array"

 specialize vp9_sad32x32x4d

-prototype void vp9_sad16x16x4d "const unsigned char *src_ptr, int  src_stride, unsigned char *ref_ptr[], int  ref_stride, unsigned int *sad_array"

+prototype void vp9_sad16x16x4d "const unsigned char *src_ptr, int  src_stride, const unsigned char* const ref_ptr[], int  ref_stride, unsigned int *sad_array"

 specialize vp9_sad16x16x4d sse3

-prototype void vp9_sad16x8x4d "const unsigned char *src_ptr, int  src_stride, unsigned char *ref_ptr[], int  ref_stride, unsigned int *sad_array"

+prototype void vp9_sad16x8x4d "const unsigned char *src_ptr, int  src_stride, const unsigned char* const ref_ptr[], int  ref_stride, unsigned int *sad_array"

 specialize vp9_sad16x8x4d sse3

-prototype void vp9_sad8x16x4d "const unsigned char *src_ptr, int  src_stride, unsigned char *ref_ptr[], int  ref_stride, unsigned int *sad_array"

+prototype void vp9_sad8x16x4d "const unsigned char *src_ptr, int  src_stride, const unsigned char* const ref_ptr[], int  ref_stride, unsigned int *sad_array"

 specialize vp9_sad8x16x4d sse3

-prototype void vp9_sad8x8x4d "const unsigned char *src_ptr, int  src_stride, unsigned char *ref_ptr[], int  ref_stride, unsigned int *sad_array"

+prototype void vp9_sad8x8x4d "const unsigned char *src_ptr, int  src_stride, const unsigned char* const ref_ptr[], int  ref_stride, unsigned int *sad_array"

 specialize vp9_sad8x8x4d sse3

-prototype void vp9_sad4x4x4d "const unsigned char *src_ptr, int  src_stride, unsigned char *ref_ptr[], int  ref_stride, unsigned int *sad_array"

+prototype void vp9_sad4x4x4d "const unsigned char *src_ptr, int  src_stride, const unsigned char* const ref_ptr[], int  ref_stride, unsigned int *sad_array"

 specialize vp9_sad4x4x4d sse3

@@ -477,6 +483,21 @@

 prototype void vp9_short_walsh8x4_x8 "short *InputData, short *OutputData, int pitch"

 specialize vp9_short_walsh8x4_x8

+#

+# Motion search

+#

+prototype int vp9_full_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv"

+specialize vp9_full_search_sad sse3 sse4_1

+vp9_full_search_sad_sse3=vp9_full_search_sadx3

+vp9_full_search_sad_sse4_1=vp9_full_search_sadx8

+prototype int vp9_refining_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv"

+specialize vp9_refining_search_sad sse3

+vp9_refining_search_sad_sse3=vp9_refining_search_sadx4

+prototype int vp9_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, DEC_MVCOSTS, union int_mv *center_mv"

+vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4

fi

 # end encoder functions

--- /dev/null

+++ b/vp9/common/subpelvar.h

@@ -1,0 +1,147 @@

+/*

+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include "vp9/common/filter.h"

+static void variance(const unsigned char *src_ptr,

+                     int  source_stride,

+                     const unsigned char *ref_ptr,

+                     int  recon_stride,

+                     int  w,

+                     int  h,

+                     unsigned int *sse,

+                     int *sum) {

+  int i, j;

+  int diff;

+  *sum = 0;

+  *sse = 0;

+  for (i = 0; i < h; i++) {

+    for (j = 0; j < w; j++) {

+      diff = src_ptr[j] - ref_ptr[j];

+      *sum += diff;

+      *sse += diff * diff;

+    }

+    src_ptr += source_stride;

+    ref_ptr += recon_stride;

+  }

+}

+/****************************************************************************

+ *

+ *  ROUTINE       : filter_block2d_bil_first_pass

+ *

+ *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.

+ *                  UINT32 src_pixels_per_line : Stride of input block.

+ *                  UINT32 pixel_step        : Offset between filter input samples (see notes).

+ *                  UINT32 output_height     : Input block height.

+ *                  UINT32 output_width      : Input block width.

+ *                  INT32  *vp9_filter          : Array of 2 bi-linear filter taps.

+ *

+ *  OUTPUTS       : INT32 *output_ptr        : Pointer to filtered block.

+ *

+ *  RETURNS       : void

+ *

+ *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in

+ *                  either horizontal or vertical direction to produce the

+ *                  filtered output block. Used to implement first-pass

+ *                  of 2-D separable filter.

+ *

+ *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.

+ *                  Two filter taps should sum to VP9_FILTER_WEIGHT.

+ *                  pixel_step defines whether the filter is applied

+ *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).

+ *                  It defines the offset required to move from one input

+ *                  to the next.

+ *

+ ****************************************************************************/

+static void var_filter_block2d_bil_first_pass(const unsigned char *src_ptr,

+                                              unsigned short *output_ptr,

+                                              unsigned int src_pixels_per_line,

+                                              int pixel_step,

+                                              unsigned int output_height,

+                                              unsigned int output_width,

+                                              const short *vp9_filter) {

+  unsigned int i, j;

+  for (i = 0; i < output_height; i++) {

+    for (j = 0; j < output_width; j++) {

+      // Apply bilinear filter

+      output_ptr[j] = (((int)src_ptr[0]          * vp9_filter[0]) +

+                       ((int)src_ptr[pixel_step] * vp9_filter[1]) +

+                       (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT;

+      src_ptr++;

+    }

+    // Next row...

+    src_ptr    += src_pixels_per_line - output_width;

+    output_ptr += output_width;

+  }

+}

+/****************************************************************************

+ *

+ *  ROUTINE       : filter_block2d_bil_second_pass

+ *

+ *  INPUTS        : INT32  *src_ptr          : Pointer to source block.

+ *                  UINT32 src_pixels_per_line : Stride of input block.

+ *                  UINT32 pixel_step        : Offset between filter input samples (see notes).

+ *                  UINT32 output_height     : Input block height.

+ *                  UINT32 output_width      : Input block width.

+ *                  INT32  *vp9_filter          : Array of 2 bi-linear filter taps.

+ *

+ *  OUTPUTS       : UINT16 *output_ptr       : Pointer to filtered block.

+ *

+ *  RETURNS       : void

+ *

+ *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in

+ *                  either horizontal or vertical direction to produce the

+ *                  filtered output block. Used to implement second-pass

+ *                  of 2-D separable filter.

+ *

+ *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.

+ *                  Two filter taps should sum to VP9_FILTER_WEIGHT.

+ *                  pixel_step defines whether the filter is applied

+ *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).

+ *                  It defines the offset required to move from one input

+ *                  to the next.

+ *

+ ****************************************************************************/

+static void var_filter_block2d_bil_second_pass(const unsigned short *src_ptr,

+                                               unsigned char *output_ptr,

+                                               unsigned int src_pixels_per_line,

+                                               unsigned int pixel_step,

+                                               unsigned int output_height,

+                                               unsigned int output_width,

+                                               const short *vp9_filter) {

+  unsigned int  i, j;

+  int  Temp;

+  for (i = 0; i < output_height; i++) {

+    for (j = 0; j < output_width; j++) {

+      // Apply filter

+      Temp = ((int)src_ptr[0]         * vp9_filter[0]) +

+             ((int)src_ptr[pixel_step] * vp9_filter[1]) +

+             (VP9_FILTER_WEIGHT / 2);

+      output_ptr[j] = (unsigned int)(Temp >> VP9_FILTER_SHIFT);

+      src_ptr++;

+    }

+    // Next row...

+    src_ptr    += src_pixels_per_line - output_width;

+    output_ptr += output_width;

+  }

+}

--- a/vp9/common/textblit.c

+++ b/vp9/common/textblit.c

@@ -10,6 +10,7 @@

 #include <stdlib.h>

+#include "vp9/common/textblit.h"

 void vp9_blit_text(const char *msg, unsigned char *address, const int pitch) {

   int letter_bitmap;

--- /dev/null

+++ b/vp9/common/textblit.h

@@ -1,0 +1,19 @@

+/*

+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#ifndef __INC_TEXTBLIT_H

+#define __INC_TEXTBLIT_H

+extern void vp9_blit_text(const char *msg, unsigned char *address,

+                          const int pitch);

+extern void vp9_blit_line(int x0, int x1, int y0, int y1,

+                          unsigned char *image, const int pitch);

+#endif  // __INC_TEXTBLIT_H

--- a/vp9/common/x86/loopfilter_x86.c

+++ b/vp9/common/x86/loopfilter_x86.c

@@ -97,13 +97,17 @@

   DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]);

   DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]);

   __m128i mask, hev, flat;

-  __m128i thresh, limit, blimit;

   const __m128i zero = _mm_set1_epi16(0);

   __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4;

-  thresh = _mm_shuffle_epi32(_mm_cvtsi32_si128(_thresh[0] * 0x01010101), 0);

-  limit = _mm_shuffle_epi32(_mm_cvtsi32_si128(_limit[0] * 0x01010101), 0);

-  blimit = _mm_shuffle_epi32(_mm_cvtsi32_si128(_blimit[0] * 0x01010101), 0);

+  const unsigned int extended_thresh = _thresh[0] * 0x01010101u;

+  const unsigned int extended_limit  = _limit[0]  * 0x01010101u;

+  const unsigned int extended_blimit = _blimit[0] * 0x01010101u;

+  const __m128i thresh =

+      _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_thresh), 0);

+  const __m128i limit =

+      _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_limit), 0);

+  const __m128i blimit =

+      _mm_shuffle_epi32(_mm_cvtsi32_si128((int)extended_blimit), 0);

   p4 = _mm_loadu_si128((__m128i *)(s - 5 * p));

   p3 = _mm_loadu_si128((__m128i *)(s - 4 * p));

--- a/vp9/common/x86/sadmxn_x86.c

+++ b/vp9/common/x86/sadmxn_x86.c

@@ -13,9 +13,6 @@

 #include "./vpx_rtcd.h"

-#if CONFIG_NEWBESTREFMV

 #if HAVE_SSE2

 unsigned int vp9_sad16x3_sse2(

   const unsigned char *src_ptr,

@@ -89,4 +86,3 @@

 #endif

-#endif  // CONFIG_NEWBESTREFMV

--- a/vp9/decoder/arm/armv6/idct_blk_v6.c

+++ b/vp9/decoder/arm/armv6/idct_blk_v6.c

@@ -12,9 +12,10 @@

 #include "vp9/common/idct.h"

 #include "vp9/decoder/dequantize.h"

-void vp8_dequant_dc_idct_add_y_block_v6

-(short *q, short *dq, unsigned char *pre,

- unsigned char *dst, int stride, char *eobs, short *dc) {

+void vp8_dequant_dc_idct_add_y_block_v6(short *q, short *dq,

+                                        unsigned char *pre,

+                                        unsigned char *dst, int stride,

+                                        unsigned short *eobs, short *dc) {

   int i;

   for (i = 0; i < 4; i++) {

@@ -46,9 +47,9 @@

-void vp8_dequant_idct_add_y_block_v6

-(short *q, short *dq, unsigned char *pre,

- unsigned char *dst, int stride, char *eobs) {

+void vp8_dequant_idct_add_y_block_v6(short *q, short *dq, unsigned char *pre,

+                                     unsigned char *dst, int stride,

+                                     unsigned short *eobs) {

   int i;

   for (i = 0; i < 4; i++) {

@@ -87,9 +88,9 @@

-void vp8_dequant_idct_add_uv_block_v6

-(short *q, short *dq, unsigned char *pre,

- unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) {

+void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq, unsigned char *pre,

+                                      unsigned char *dstu, unsigned char *dstv,

+                                      int stride, unsigned short *eobs) {

   int i;

   for (i = 0; i < 2; i++) {

--- a/vp9/decoder/arm/neon/idct_blk_neon.c

+++ b/vp9/decoder/arm/neon/idct_blk_neon.c

@@ -27,9 +27,10 @@

 (short *q, short dq, unsigned char *pre, int pitch,

  unsigned char *dst, int stride);

-void vp8_dequant_dc_idct_add_y_block_neon

-(short *q, short *dq, unsigned char *pre,

- unsigned char *dst, int stride, char *eobs, short *dc) {

+void vp8_dequant_dc_idct_add_y_block_neon(short *q, short *dq,

+                                          unsigned char *pre,

+                                          unsigned char *dst, int stride,

+                                          unsigned short *eobs, short *dc) {

   int i;

   for (i = 0; i < 4; i++) {

@@ -51,9 +52,9 @@

-void vp8_dequant_idct_add_y_block_neon

-(short *q, short *dq, unsigned char *pre,

- unsigned char *dst, int stride, char *eobs) {

+void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *pre,

+                                       unsigned char *dst, int stride,

+                                       unsigned short *eobs) {

   int i;

   for (i = 0; i < 4; i++) {

@@ -74,9 +75,11 @@

-void vp8_dequant_idct_add_uv_block_neon

-(short *q, short *dq, unsigned char *pre,

- unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) {

+void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq,

+                                        unsigned char *pre,

+                                        unsigned char *dstu,

+                                        unsigned char *dstv, int stride,

+                                        unsigned short *eobs) {

   if (((short *)eobs)[0] & 0xfefe)

     idct_dequant_full_2x_neon(q, dq, pre, dstu, 8, stride);

   else

--- a/vp9/decoder/dboolhuff.h

+++ b/vp9/decoder/dboolhuff.h

@@ -56,7 +56,7 @@

{ \

     int shift = VP9_BD_VALUE_SIZE - 8 - ((_count) + 8); \

     int loop_end, x; \

-    size_t bits_left = ((_bufend)-(_bufptr))*CHAR_BIT; \

+    int bits_left = (int)(((_bufend)-(_bufptr))*CHAR_BIT); \

     x = shift + CHAR_BIT - bits_left; \

     loop_end = 0; \

--- a/vp9/decoder/decodemv.c

+++ b/vp9/decoder/decodemv.c

@@ -654,7 +654,7 @@

   const int mis = pbi->common.mode_info_stride;

   MACROBLOCKD *const xd  = &pbi->mb;

-  int_mv *const mv = &mbmi->mv;

+  int_mv *const mv = &mbmi->mv[0];

   int mb_to_left_edge;

   int mb_to_right_edge;

   int mb_to_top_edge;

@@ -712,17 +712,13 @@

     int_mv nearest_second, nearby_second, best_mv_second;

     vp9_prob mv_ref_p [VP9_MVREFS - 1];

-#if CONFIG_NEWBESTREFMV

     int recon_y_stride, recon_yoffset;

     int recon_uv_stride, recon_uvoffset;

-#endif

     vp9_find_near_mvs(xd, mi,

                       prev_mi,

                       &nearest, &nearby, &best_mv, rct,

                       mbmi->ref_frame, cm->ref_frame_sign_bias);

-#if CONFIG_NEWBESTREFMV

       int ref_fb_idx;

       MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame;

@@ -755,7 +751,6 @@

                             mbmi->ref_mvs[ref_frame],

                             &best_mv, &nearest, &nearby);

-#endif

     vp9_mv_ref_probs(&pbi->common, mv_ref_p, rct);

@@ -808,7 +803,6 @@

       mbmi->second_ref_frame = mbmi->ref_frame + 1;

       if (mbmi->second_ref_frame == 4)

         mbmi->second_ref_frame = 1;

-#if CONFIG_NEWBESTREFMV

       if (mbmi->second_ref_frame) {

         int second_ref_fb_idx;

         /* Select the appropriate reference frame for this MB */

@@ -845,13 +839,7 @@

                               &nearest_second,

                               &nearby_second);

-#else

-      vp9_find_near_mvs(xd, mi, prev_mi,

-                        &nearest_second, &nearby_second, &best_mv_second,

-                        rct,

-                        mbmi->second_ref_frame,

-                        pbi->common.ref_frame_sign_bias);

-#endif

     } else {

       mbmi->second_ref_frame = 0;

@@ -1172,7 +1160,7 @@

-void vp9_decode_mode_mvs_init(VP9D_COMP *pbi, BOOL_DECODER* const bc) {

+void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc) {

   VP9_COMMON *cm = &pbi->common;

   vpx_memset(cm->mbskip_pred_probs, 0, sizeof(cm->mbskip_pred_probs));

@@ -1184,8 +1172,8 @@

   mb_mode_mv_init(pbi, bc);

-void vp9_decode_mb_mode_mv(VP9D_COMP *pbi,

-                           MACROBLOCKD *xd,

+void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi,

+                           MACROBLOCKD* const xd,

                            int mb_row,

                            int mb_col,

                            BOOL_DECODER* const bc) {

--- a/vp9/decoder/decodframe.c

+++ b/vp9/decoder/decodframe.c

@@ -14,6 +14,7 @@

 #include "vp9/common/reconintra.h"

 #include "vp9/common/reconintra4x4.h"

 #include "vp9/common/reconinter.h"

+#include "vp9/decoder/decodframe.h"

 #include "detokenize.h"

 #include "vp9/common/invtrans.h"

 #include "vp9/common/alloccommon.h"

@@ -728,7 +729,8 @@

                        "%d length", 1);

-  if (vp9_start_decode(bool_decoder, partition, partition_size))

+  if (vp9_start_decode(bool_decoder,

+                       partition, (unsigned int)partition_size))

     vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,

                        "Failed to allocate bool decoder %d", 1);

@@ -985,7 +987,8 @@

   init_frame(pbi);

-  if (vp9_start_decode(&header_bc, data, first_partition_length_in_bytes))

+  if (vp9_start_decode(&header_bc, data,

+                       (unsigned int)first_partition_length_in_bytes))

     vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,

                        "Failed to allocate bool decoder 0");

   if (pc->frame_type == KEY_FRAME) {

--- /dev/null

+++ b/vp9/decoder/decodframe.h

@@ -1,0 +1,19 @@

+/*

+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#ifndef __INC_DECODFRAME_H

+#define __INC_DECODFRAME_H

+struct VP9Decompressor;

+extern void vp9_init_de_quantizer(struct VP9Decompressor *pbi);

+#endif  // __INC_DECODFRAME_H

--- a/vp9/decoder/dequantize.c

+++ b/vp9/decoder/dequantize.c

@@ -9,23 +9,11 @@

*/

-#include "vpx_ports/config.h"

+#include "vpx_rtcd.h"

 #include "dequantize.h"

 #include "vp9/common/idct.h"

 #include "vpx_mem/vpx_mem.h"

 #include "onyxd_int.h"

-extern void vp9_short_idct4x4llm_c(short *input, short *output, int pitch);

-extern void vp9_short_idct4x4llm_1_c(short *input, short *output, int pitch);

-extern void vp9_short_idct8x8_c(short *input, short *output, int pitch);

-extern void vp9_short_idct8x8_1_c(short *input, short *output, int pitch);

-#if CONFIG_LOSSLESS

-extern void vp9_short_inv_walsh4x4_x8_c(short *input, short *output,

-                                        int pitch);

-extern void vp9_short_inv_walsh4x4_1_x8_c(short *input, short *output,

-                                          int pitch);

-#endif

 #ifdef DEC_DEBUG

 extern int dec_debug;

--- a/vp9/decoder/dequantize.h

+++ b/vp9/decoder/dequantize.h

@@ -25,17 +25,20 @@

 extern void vp9_dequant_dc_idct_add_y_block_lossless_c(short *q, short *dq,

                                                        unsigned char *pre,

                                                        unsigned char *dst,

-                                                       int stride, char *eobs,

+                                                       int stride,

+                                                       unsigned short *eobs,

                                                        short *dc);

 extern void vp9_dequant_idct_add_y_block_lossless_c(short *q, short *dq,

                                                     unsigned char *pre,

                                                     unsigned char *dst,

-                                                    int stride, char *eobs);

+                                                    int stride,

+                                                    unsigned short *eobs);

 extern void vp9_dequant_idct_add_uv_block_lossless_c(short *q, short *dq,

                                                      unsigned char *pre,

                                                      unsigned char *dst_u,

                                                      unsigned char *dst_v,

-                                                     int stride, char *eobs);

+                                                     int stride,

+                                                     unsigned short *eobs);

 #endif

 typedef void (*vp9_dequant_idct_add_fn_t)(short *input, short *dq,

@@ -44,12 +47,13 @@

     unsigned char *pred, unsigned char *output, int pitch, int stride, int dc);

 typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(short *q, short *dq,

-    unsigned char *pre, unsigned char *dst, int stride, char *eobs, short *dc);

+    unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs,

+    short *dc);

 typedef void(*vp9_dequant_idct_add_y_block_fn_t)(short *q, short *dq,

-    unsigned char *pre, unsigned char *dst, int stride, char *eobs);

+    unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs);

 typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(short *q, short *dq,

     unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride,

-    char *eobs);

+    unsigned short *eobs);

 void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq,

                                     unsigned char *pred, unsigned char *dest,

@@ -66,12 +70,14 @@

 #if CONFIG_SUPERBLOCKS

 void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, short *dq,

                                                    unsigned char *dst,

-                                                   int stride, char *eobs,

+                                                   int stride,

+                                                   unsigned short *eobs,

                                                    short *dc, MACROBLOCKD *xd);

 void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, short *dq,

                                                  unsigned char *dstu,

                                                  unsigned char *dstv,

-                                                 int stride, char *eobs,

+                                                 int stride,

+                                                 unsigned short *eobs,

                                                  MACROBLOCKD *xd);

 #endif

--- a/vp9/decoder/detokenize.c

+++ b/vp9/decoder/detokenize.c

@@ -98,7 +98,7 @@

 static const unsigned char cat6_prob[14] =

 { 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };

-void vp9_reset_mb_tokens_context(MACROBLOCKD *xd) {

+void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) {

   /* Clear entropy contexts for Y2 blocks */

   if ((xd->mode_info_context->mbmi.mode != B_PRED &&

       xd->mode_info_context->mbmi.mode != I8X8_PRED &&

@@ -414,12 +414,13 @@

   return c;

-int vp9_decode_mb_tokens_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,

+int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi,

+                               MACROBLOCKD* const xd,

                                BOOL_DECODER* const bc) {

   ENTROPY_CONTEXT* const A = (ENTROPY_CONTEXT *)xd->above_context;

   ENTROPY_CONTEXT* const L = (ENTROPY_CONTEXT *)xd->left_context;

-  char* const eobs = xd->eobs;

+  unsigned short* const eobs = xd->eobs;

   PLANE_TYPE type;

   int c, i, eobtotal = 0, seg_eob;

   const int segment_id = xd->mode_info_context->mbmi.segment_id;

@@ -477,12 +478,13 @@

   return eobtotal;

-int vp9_decode_mb_tokens_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,

+int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,

+                             MACROBLOCKD* const xd,

                              BOOL_DECODER* const bc) {

   ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context;

   ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context;

-  char *const eobs = xd->eobs;

+  unsigned short *const eobs = xd->eobs;

   PLANE_TYPE type;

   int c, i, eobtotal = 0, seg_eob;

   const int segment_id = xd->mode_info_context->mbmi.segment_id;

@@ -571,12 +573,13 @@

-int vp9_decode_mb_tokens(VP9D_COMP *dx, MACROBLOCKD *xd,

+int vp9_decode_mb_tokens(VP9D_COMP* const dx,

+                         MACROBLOCKD* const xd,

                          BOOL_DECODER* const bc) {

   ENTROPY_CONTEXT *const A = (ENTROPY_CONTEXT *)xd->above_context;

   ENTROPY_CONTEXT *const L = (ENTROPY_CONTEXT *)xd->left_context;

-  char *const eobs = xd->eobs;

+  unsigned short *const eobs = xd->eobs;

   const int *scan = vp9_default_zig_zag1d;

   PLANE_TYPE type;

   int c, i, eobtotal = 0, seg_eob = 16;

--- a/vp9/decoder/idct_blk.c

+++ b/vp9/decoder/idct_blk.c

@@ -8,30 +8,13 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

-#include "vpx_ports/config.h"

+#include "vpx_rtcd.h"

 #include "vp9/common/idct.h"

-#include "dequantize.h"

-void vp9_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,

-                               unsigned char *dest, int pitch, int stride,

-                               int Dc);

-void vp9_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,

-                            unsigned char *dest, int pitch, int stride);

-void vp9_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,

-                            unsigned char *dst_ptr, int pitch, int stride);

-#if CONFIG_LOSSLESS

-void vp9_dequant_idct_add_lossless_c(short *input, short *dq,

-                                     unsigned char *pred, unsigned char *dest,

-                                     int pitch, int stride);

-void vp9_dc_only_idct_add_lossless_c(short input_dc, unsigned char *pred_ptr,

-                                     unsigned char *dst_ptr,

-                                     int pitch, int stride);

-#endif

 void vp9_dequant_dc_idct_add_y_block_c(short *q, short *dq,

                                        unsigned char *pre,

                                        unsigned char *dst,

-                                       int stride, char *eobs,

+                                       int stride, unsigned short *eobs,

                                        short *dc) {

   int i, j;

@@ -56,7 +39,7 @@

 void vp9_dequant_idct_add_y_block_c(short *q, short *dq,

                                     unsigned char *pre,

                                     unsigned char *dst,

-                                    int stride, char *eobs) {

+                                    int stride, unsigned short *eobs) {

   int i, j;

   for (i = 0; i < 4; i++) {

@@ -80,7 +63,7 @@

 void vp9_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *pre,

                                      unsigned char *dstu, unsigned char *dstv,

-                                     int stride, char *eobs) {

+                                     int stride, unsigned short *eobs) {

   int i, j;

   for (i = 0; i < 2; i++) {

@@ -124,7 +107,8 @@

 void vp9_dequant_dc_idct_add_y_block_8x8_c(short *q, short *dq,

                                            unsigned char *pre,

                                            unsigned char *dst,

-                                           int stride, char *eobs, short *dc,

+                                           int stride, unsigned short *eobs,

+                                           short *dc,

                                            MACROBLOCKD *xd) {

   vp9_dequant_dc_idct_add_8x8_c(q, dq, pre, dst, 16, stride, dc[0]);

   vp9_dequant_dc_idct_add_8x8_c(&q[64], dq, pre + 8, dst + 8, 16, stride, dc[1]);

@@ -137,7 +121,8 @@

 #if CONFIG_SUPERBLOCKS

 void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, short *dq,

                                                    unsigned char *dst,

-                                                   int stride, char *eobs,

+                                                   int stride,

+                                                   unsigned short *eobs,

                                                    short *dc, MACROBLOCKD *xd) {

   vp9_dequant_dc_idct_add_8x8_c(q, dq, dst, dst, stride, stride, dc[0]);

   vp9_dequant_dc_idct_add_8x8_c(&q[64], dq, dst + 8,

@@ -152,7 +137,7 @@

 void vp9_dequant_idct_add_y_block_8x8_c(short *q, short *dq,

                                         unsigned char *pre,

                                         unsigned char *dst,

-                                        int stride, char *eobs,

+                                        int stride, unsigned short *eobs,

                                         MACROBLOCKD *xd) {

   unsigned char *origdest = dst;

   unsigned char *origpred = pre;

@@ -170,7 +155,7 @@

                                          unsigned char *pre,

                                          unsigned char *dstu,

                                          unsigned char *dstv,

-                                         int stride, char *eobs,

+                                         int stride, unsigned short *eobs,

                                          MACROBLOCKD *xd) {

   vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride);

@@ -184,7 +169,8 @@

 void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, short *dq,

                                                  unsigned char *dstu,

                                                  unsigned char *dstv,

-                                                 int stride, char *eobs,

+                                                 int stride,

+                                                 unsigned short *eobs,

                                                  MACROBLOCKD *xd) {

   vp9_dequant_idct_add_8x8_c(q, dq, dstu, dstu, stride, stride);

@@ -198,7 +184,8 @@

 void vp9_dequant_dc_idct_add_y_block_lossless_c(short *q, short *dq,

                                                 unsigned char *pre,

                                                 unsigned char *dst,

-                                                int stride, char *eobs,

+                                                int stride,

+                                                unsigned short *eobs,

                                                 short *dc) {

   int i, j;

@@ -223,7 +210,7 @@

 void vp9_dequant_idct_add_y_block_lossless_c(short *q, short *dq,

                                              unsigned char *pre,

                                              unsigned char *dst,

-                                             int stride, char *eobs) {

+                                             int stride, unsigned short *eobs) {

   int i, j;

   for (i = 0; i < 4; i++) {

@@ -249,7 +236,8 @@

                                               unsigned char *pre,

                                               unsigned char *dstu,

                                               unsigned char *dstv,

-                                              int stride, char *eobs) {

+                                              int stride,

+                                              unsigned short *eobs) {

   int i, j;

   for (i = 0; i < 2; i++) {

--- a/vp9/decoder/onyxd_if.c

+++ b/vp9/decoder/onyxd_if.c

@@ -26,12 +26,12 @@

 #include "vpx_scale/vpxscale.h"

 #include "vp9/common/systemdependent.h"

 #include "vpx_ports/vpx_timer.h"

+#include "vp9/decoder/decodframe.h"

 #include "detokenize.h"

 #if ARCH_ARM

 #include "vpx_ports/arm.h"

 #endif

-extern void vp9_init_de_quantizer(VP9D_COMP *pbi);

 static int get_free_fb(VP9_COMMON *cm);

 static void ref_cnt_fb(int *buf, int *idx, int new_idx);

--- a/vp9/decoder/x86/idct_blk_mmx.c

+++ b/vp9/decoder/x86/idct_blk_mmx.c

@@ -15,7 +15,8 @@

 void vp9_dequant_dc_idct_add_y_block_mmx(short *q, short *dq,

                                          unsigned char *pre,

                                          unsigned char *dst,

-                                         int stride, char *eobs, short *dc) {

+                                         int stride, unsigned short *eobs,

+                                         short *dc) {

   int i;

   for (i = 0; i < 4; i++) {

@@ -53,7 +54,7 @@

 void vp9_dequant_idct_add_y_block_mmx(short *q, short *dq,

                                       unsigned char *pre,

                                       unsigned char *dst,

-                                      int stride, char *eobs) {

+                                      int stride, unsigned short *eobs) {

   int i;

   for (i = 0; i < 4; i++) {

@@ -96,7 +97,7 @@

                                        unsigned char *pre,

                                        unsigned char *dstu,

                                        unsigned char *dstv,

-                                       int stride, char *eobs) {

+                                       int stride, unsigned short *eobs) {

   int i;

   for (i = 0; i < 2; i++) {

--- a/vp9/decoder/x86/idct_blk_sse2.c

+++ b/vp9/decoder/x86/idct_blk_sse2.c

@@ -31,7 +31,8 @@

 void vp9_dequant_dc_idct_add_y_block_sse2(short *q, short *dq,

                                           unsigned char *pre,

                                           unsigned char *dst,

-                                          int stride, char *eobs, short *dc) {

+                                          int stride, unsigned short *eobs,

+                                          short *dc) {

   int i;

   for (i = 0; i < 4; i++) {

@@ -57,7 +58,7 @@

 void vp9_dequant_idct_add_y_block_sse2(short *q, short *dq,

                                        unsigned char *pre, unsigned char *dst,

-                                       int stride, char *eobs) {

+                                       int stride, unsigned short *eobs) {

   int i;

   for (i = 0; i < 4; i++) {

@@ -82,7 +83,7 @@

                                         unsigned char *pre,

                                         unsigned char *dstu,

                                         unsigned char *dstv,

-                                        int stride, char *eobs) {

+                                        int stride, unsigned short *eobs) {

   if (((short *)(eobs))[0] & 0xfefe)

     vp9_idct_dequant_full_2x_sse2(q, dq, pre, dstu, stride, 8);

   else

--- a/vp9/encoder/bitstream.c

+++ b/vp9/encoder/bitstream.c

@@ -29,10 +29,7 @@

 #include "vp9/common/entropy.h"

 #include "vp9/encoder/encodemv.h"

 #include "vp9/common/entropymv.h"

-#if CONFIG_NEWBESTREFMV

 #include "vp9/common/mvref_common.h"

-#endif

 #if defined(SECTIONBITS_OUTPUT)

 unsigned __int64 Sectionbits[500];

@@ -186,7 +183,6 @@

 void vp9_update_skip_probs(VP9_COMP *cpi) {

   VP9_COMMON *const pc = &cpi->common;

-  int prob_skip_false[3] = {0, 0, 0};

   int k;

   for (k = 0; k < MBSKIP_CONTEXTS; ++k) {

@@ -218,7 +214,6 @@

 static void update_refpred_stats(VP9_COMP *cpi) {

   VP9_COMMON *const cm = &cpi->common;

   int i;

-  int tot_count;

   vp9_prob new_pred_probs[PREDICTION_PROBS];

   int old_cost, new_cost;

@@ -884,7 +879,6 @@

 static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) {

-  int i;

   VP9_COMMON *const pc = &cpi->common;

   const nmv_context *nmvc = &pc->fc.nmvc;

   MACROBLOCK *x = &cpi->mb;

@@ -1062,9 +1056,8 @@

             // Only used for context just now and soon to be deprecated.

             vp9_find_near_mvs(xd, m, prev_m, &n1, &n2, &best_mv, ct,

                               rf, cpi->common.ref_frame_sign_bias);

-#if CONFIG_NEWBESTREFMV

             best_mv.as_int = mi->ref_mvs[rf][0].as_int;

-#endif

             vp9_mv_ref_probs(&cpi->common, mv_ref_p, ct);

@@ -1124,10 +1117,8 @@

                               mi->second_ref_frame,

                               cpi->common.ref_frame_sign_bias);

-#if CONFIG_NEWBESTREFMV

             best_second_mv.as_int =

               mi->ref_mvs[mi->second_ref_frame][0].as_int;

-#endif

           // does the feature use compound prediction or not

@@ -1315,7 +1306,6 @@

                               const MODE_INFO   *m,

                               int                mode_info_stride,

                               vp9_writer *const  bc) {

-  const int mis = mode_info_stride;

   int ym;

   int segment_id;

@@ -1331,6 +1321,7 @@

        (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) {

         int skip_coeff = m->mbmi.mb_skip_coeff;

 #if CONFIG_SUPERBLOCKS

+        const int mis = mode_info_stride;

         if (m->mbmi.encoded_as_sb) {

           skip_coeff &= m[1].mbmi.mb_skip_coeff;

           skip_coeff &= m[mis].mbmi.mb_skip_coeff;

--- a/vp9/encoder/block.h

+++ b/vp9/encoder/block.h

@@ -70,9 +70,7 @@

   PARTITION_INFO partition_info;

   int_mv best_ref_mv;

   int_mv second_best_ref_mv;

-#if CONFIG_NEWBESTREFMV || CONFIG_NEW_MVREF

   int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REFS];

-#endif

   int rate;

   int distortion;

   int64_t intra_error;

--- a/vp9/encoder/dct.c

+++ b/vp9/encoder/dct.c

@@ -901,6 +901,8 @@

 #endif

+#define TEST_INT_16x16_DCT 1

+#if !TEST_INT_16x16_DCT

 static const double C1 = 0.995184726672197;

 static const double C2 = 0.98078528040323;

 static const double C3 = 0.956940335732209;

@@ -1107,3 +1109,225 @@

   vp9_clear_system_state(); // Make it simd safe : __asm emms;

+#else

+static const int16_t C1 = 16305;

+static const int16_t C2 = 16069;

+static const int16_t C3 = 15679;

+static const int16_t C4 = 15137;

+static const int16_t C5 = 14449;

+static const int16_t C6 = 13623;

+static const int16_t C7 = 12665;

+static const int16_t C8 = 11585;

+static const int16_t C9 = 10394;

+static const int16_t C10 = 9102;

+static const int16_t C11 = 7723;

+static const int16_t C12 = 6270;

+static const int16_t C13 = 4756;

+static const int16_t C14 = 3196;

+static const int16_t C15 = 1606;

+#define RIGHT_SHIFT 14

+#define ROUNDING (1 << (RIGHT_SHIFT - 1))

+static void dct16x16_1d(int16_t input[16], int16_t output[16],

+                        int last_shift_bits) {

+    int16_t step[16];

+    int intermediate[16];

+    int temp1, temp2;

+    int final_shift = RIGHT_SHIFT;

+    int final_rounding = ROUNDING;

+    int output_shift = 0;

+    int output_rounding = 0;

+    final_shift += last_shift_bits;

+    if (final_shift > 0)

+    final_rounding = 1 << (final_shift - 1);

+    output_shift += last_shift_bits;

+    if (output_shift > 0)

+      output_rounding = 1 << (output_shift - 1);

+    // step 1

+    step[ 0] = input[0] + input[15];

+    step[ 1] = input[1] + input[14];

+    step[ 2] = input[2] + input[13];

+    step[ 3] = input[3] + input[12];

+    step[ 4] = input[4] + input[11];

+    step[ 5] = input[5] + input[10];

+    step[ 6] = input[6] + input[ 9];

+    step[ 7] = input[7] + input[ 8];

+    step[ 8] = input[7] - input[ 8];

+    step[ 9] = input[6] - input[ 9];

+    step[10] = input[5] - input[10];

+    step[11] = input[4] - input[11];

+    step[12] = input[3] - input[12];

+    step[13] = input[2] - input[13];

+    step[14] = input[1] - input[14];

+    step[15] = input[0] - input[15];

+    // step 2

+    output[0] = step[0] + step[7];

+    output[1] = step[1] + step[6];

+    output[2] = step[2] + step[5];

+    output[3] = step[3] + step[4];

+    output[4] = step[3] - step[4];

+    output[5] = step[2] - step[5];

+    output[6] = step[1] - step[6];

+    output[7] = step[0] - step[7];

+    temp1 = step[ 8] * C7;

+    temp2 = step[15] * C9;

+    output[ 8] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = step[ 9] * C11;

+    temp2 = step[14] * C5;

+    output[ 9] = (temp1 - temp2 + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = step[10] * C3;

+    temp2 = step[13] * C13;

+    output[10] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = step[11] * C15;

+    temp2 = step[12] * C1;

+    output[11] = (temp1 - temp2 + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = step[11] * C1;

+    temp2 = step[12] * C15;

+    output[12] = (temp2 + temp1 + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = step[10] * C13;

+    temp2 = step[13] * C3;

+    output[13] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = step[ 9] * C5;

+    temp2 = step[14] * C11;

+    output[14] = (temp2 + temp1 + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = step[ 8] * C9;

+    temp2 = step[15] * C7;

+    output[15] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;

+    // step 3

+    step[ 0] = output[0] + output[3];

+    step[ 1] = output[1] + output[2];

+    step[ 2] = output[1] - output[2];

+    step[ 3] = output[0] - output[3];

+    temp1 = output[4] * C14;

+    temp2 = output[7] * C2;

+    step[ 4] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = output[5] * C10;

+    temp2 = output[6] * C6;

+    step[ 5] = (temp1 + temp2 + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = output[5] * C6;

+    temp2 = output[6] * C10;

+    step[ 6] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = output[4] * C2;

+    temp2 = output[7] * C14;

+    step[ 7] = (temp2 - temp1 + ROUNDING) >> RIGHT_SHIFT;

+    step[ 8] = output[ 8] + output[11];

+    step[ 9] = output[ 9] + output[10];

+    step[10] = output[ 9] - output[10];

+    step[11] = output[ 8] - output[11];

+    step[12] = output[12] + output[15];

+    step[13] = output[13] + output[14];

+    step[14] = output[13] - output[14];

+    step[15] = output[12] - output[15];

+    // step 4

+    output[ 0] = (step[ 0] + step[ 1] + output_rounding) >> output_shift;

+    output[ 8] = (step[ 0] - step[ 1] + output_rounding) >> output_shift;

+    temp1 = step[2] * C12;

+    temp2 = step[3] * C4;

+    temp1 = (temp1 + temp2 + final_rounding) >> final_shift;

+    output[ 4] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = step[2] * C4;

+    temp2 = step[3] * C12;

+    temp1 = (temp2 - temp1 + final_rounding) >> final_shift;

+    output[12] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;

+    output[ 2] = (2 * ((step[4] + step[ 5]) * C8) + final_rounding)

+        >> final_shift;

+    output[14] = (2 * ((step[7] - step[ 6]) * C8) + final_rounding)

+        >> final_shift;

+    temp1 = step[4] - step[5];

+    temp2 = step[6] + step[7];

+    output[ 6] = (temp1 + temp2 + output_rounding) >> output_shift;

+    output[10] = (temp1 - temp2 + output_rounding) >> output_shift;

+    intermediate[8] = step[8] + step[14];

+    intermediate[9] = step[9] + step[15];

+    temp1 = intermediate[8] * C12;

+    temp2 = intermediate[9] * C4;

+    temp1 = (temp1 - temp2 + final_rounding) >> final_shift;

+    output[3] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = intermediate[8] * C4;

+    temp2 = intermediate[9] * C12;

+    temp1 = (temp2 + temp1 + final_rounding) >> final_shift;

+    output[13] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;

+    output[ 9] = (2 * ((step[10] + step[11]) * C8) + final_rounding)

+        >> final_shift;

+    intermediate[11] = step[10] - step[11];

+    intermediate[12] = step[12] + step[13];

+    intermediate[13] = step[12] - step[13];

+    intermediate[14] = step[ 8] - step[14];

+    intermediate[15] = step[ 9] - step[15];

+    output[15] = (intermediate[11] + intermediate[12] + output_rounding)

+        >> output_shift;

+    output[ 1] = -(intermediate[11] - intermediate[12] + output_rounding)

+        >> output_shift;

+    output[ 7] = (2 * (intermediate[13] * C8) + final_rounding) >> final_shift;

+    temp1 = intermediate[14] * C12;

+    temp2 = intermediate[15] * C4;

+    temp1 = (temp1 - temp2 + final_rounding) >> final_shift;

+    output[11] = (-2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;

+    temp1 = intermediate[14] * C4;

+    temp2 = intermediate[15] * C12;

+    temp1 = (temp2 + temp1 + final_rounding) >> final_shift;

+    output[ 5] = (2 * (temp1 * C8) + ROUNDING) >> RIGHT_SHIFT;

+}

+void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) {

+    int shortpitch = pitch >> 1;

+    int i, j;

+    int16_t output[256];

+    int16_t *outptr = &output[0];

+    // First transform columns

+    for (i = 0; i < 16; i++) {

+        int16_t temp_in[16];

+        int16_t temp_out[16];

+        for (j = 0; j < 16; j++)

+            temp_in[j] = input[j * shortpitch + i];

+        dct16x16_1d(temp_in, temp_out, 0);

+        for (j = 0; j < 16; j++)

+            output[j * 16 + i] = temp_out[j];

+    }

+    // Then transform rows

+    for (i = 0; i < 16; ++i) {

+        dct16x16_1d(outptr, out, 1);

+        outptr += 16;

+        out += 16;

+    }

+}

+#undef RIGHT_SHIFT

+#undef ROUNDING

+#endif

--- a/vp9/encoder/encodeframe.c

+++ b/vp9/encoder/encodeframe.c

@@ -10,6 +10,7 @@

 #include "vpx_ports/config.h"

+#include "vp9/encoder/encodeframe.h"

 #include "encodemb.h"

 #include "encodemv.h"

 #include "vp9/common/common.h"

@@ -27,6 +28,7 @@

 #include "vp9/common/findnearmv.h"

 #include "vp9/common/reconintra.h"

 #include "vp9/common/seg_common.h"

+#include "vp9/encoder/tokenize.h"

 #include "vpx_rtcd.h"

 #include <stdio.h>

 #include <math.h>

@@ -34,13 +36,10 @@

 #include "vp9/common/subpixel.h"

 #include "vpx_ports/vpx_timer.h"

 #include "vp9/common/pred_common.h"

+#include "vp9/common/mvref_common.h"

 #define DBG_PRNT_SEGMAP 0

-#if CONFIG_NEWBESTREFMV

-#include "vp9/common/mvref_common.h"

-#endif

 #if CONFIG_RUNTIME_CPU_DETECT

 #define RTCD(x)     &cpi->common.rtcd.x

 #define IF_RTCD(x)  (x)

@@ -54,36 +53,20 @@

 int mb_row_debug, mb_col_debug;

 #endif

-extern void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex);

+static void encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,

+                                    TOKENEXTRA **t, int recon_yoffset,

+                                    int recon_uvoffset, int output_enabled);

-extern void vp9_auto_select_speed(VP9_COMP *cpi);

+static void encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x,

+                                    TOKENEXTRA **t, int recon_yoffset,

+                                    int recon_uvoffset, int mb_col, int mb_row);

-int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,

-                              int recon_yoffset, int recon_uvoffset,

-                              int *returnrate, int *returndistortion);

+static void encode_intra_macro_block(VP9_COMP *cpi, MACROBLOCK *x,

+                                     TOKENEXTRA **t, int output_enabled);

-extern void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,

-                                           int recon_yoffset,

-                                           int recon_uvoffset, int *r, int *d);

+static void encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x,

+                                     TOKENEXTRA **t, int mb_col);

-void vp9_build_block_offsets(MACROBLOCK *x);

-void vp9_setup_block_ptrs(MACROBLOCK *x);

-void vp9_encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,

-                                 int recon_yoffset, int recon_uvoffset,

-                                 int output_enabled);

-void vp9_encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,

-                                 int recon_yoffset, int recon_uvoffset,

-                                 int mb_col, int mb_row);

-void vp9_encode_intra_macro_block(VP9_COMP *cpi, MACROBLOCK *x,

-                                  TOKENEXTRA **t, int output_enabled);

-void vp9_encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x,

-                                  TOKENEXTRA **t, int mb_col);

 static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x);

 #ifdef MODE_STATS

@@ -634,7 +617,7 @@

       *totaldist += d;

       // Dummy encode, do not do the tokenization

-      vp9_encode_intra_macro_block(cpi, x, tp, 0);

+      encode_intra_macro_block(cpi, x, tp, 0);

       // Note the encoder may have changed the segment_id

       // Save the coding context

@@ -661,8 +644,8 @@

       *totaldist += d;

       // Dummy encode, do not do the tokenization

-      vp9_encode_inter_macroblock(cpi, x, tp,

-                                  recon_yoffset, recon_uvoffset, 0);

+      encode_inter_macroblock(cpi, x, tp,

+                              recon_yoffset, recon_uvoffset, 0);

       seg_id = mbmi->segment_id;

       if (cpi->mb.e_mbd.segmentation_enabled && seg_id == 0) {

@@ -992,10 +975,10 @@

     if (cm->frame_type == KEY_FRAME) {

 #if CONFIG_SUPERBLOCKS

       if (xd->mode_info_context->mbmi.encoded_as_sb)

-        vp9_encode_intra_super_block(cpi, x, tp, mb_col);

+        encode_intra_super_block(cpi, x, tp, mb_col);

       else

 #endif

-        vp9_encode_intra_macro_block(cpi, x, tp, 1);

+        encode_intra_macro_block(cpi, x, tp, 1);

         // Note the encoder may have changed the segment_id

 #ifdef MODE_STATS

@@ -1018,12 +1001,12 @@

 #if CONFIG_SUPERBLOCKS

       if (xd->mode_info_context->mbmi.encoded_as_sb)

-        vp9_encode_inter_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset,

-                                    mb_col, mb_row);

+        encode_inter_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset,

+                                mb_col, mb_row);

       else

 #endif

-        vp9_encode_inter_macroblock(cpi, x, tp,

-                                    recon_yoffset, recon_uvoffset, 1);

+        encode_inter_macroblock(cpi, x, tp,

+                                recon_yoffset, recon_uvoffset, 1);

         // Note the encoder may have changed the segment_id

 #ifdef MODE_STATS

@@ -1404,7 +1387,7 @@

         x->src.v_buffer += 16 * x->src.uv_stride - 8 * offset;

-      cpi->tok_count = tp - cpi->tok;

+      cpi->tok_count = (unsigned int)(tp - cpi->tok);

     vpx_usec_timer_mark(&emr_timer);

@@ -1592,7 +1575,7 @@

     encode_frame_internal(cpi);

     for (i = 0; i < NB_PREDICTION_TYPES; ++i) {

-      const int diff = cpi->rd_comp_pred_diff[i] / cpi->common.MBs;

+      const int diff = (int)(cpi->rd_comp_pred_diff[i] / cpi->common.MBs);

       cpi->rd_prediction_type_threshes[frame_type][i] += diff;

       cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;

@@ -1602,7 +1585,7 @@

       int diff;

       if (i == TX_MODE_SELECT)

         pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZE_MAX - 1), 0);

-      diff = pd / cpi->common.MBs;

+      diff = (int)(pd / cpi->common.MBs);

       cpi->rd_tx_select_threshes[frame_type][i] += diff;

       cpi->rd_tx_select_threshes[frame_type][i] /= 2;

@@ -1851,10 +1834,8 @@

-void vp9_encode_intra_super_block(VP9_COMP *cpi,

-                                  MACROBLOCK *x,

-                                  TOKENEXTRA **t,

-                                  int mb_col) {

+static void encode_intra_super_block(VP9_COMP *cpi, MACROBLOCK *x,

+                                     TOKENEXTRA **t, int mb_col) {

   const int output_enabled = 1;

   int n;

   MACROBLOCKD *xd = &x->e_mbd;

@@ -1931,10 +1912,8 @@

 #endif /* CONFIG_SUPERBLOCKS */

-void vp9_encode_intra_macro_block(VP9_COMP *cpi,

-                                  MACROBLOCK *x,

-                                  TOKENEXTRA **t,

-                                  int output_enabled) {

+static void encode_intra_macro_block(VP9_COMP *cpi, MACROBLOCK *x,

+                                     TOKENEXTRA **t, int output_enabled) {

   MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;

   if ((cpi->oxcf.tuning == VP8_TUNE_SSIM) && output_enabled) {

     adjust_act_zbin(cpi, x);

@@ -1977,18 +1956,13 @@

     } else {

       mbmi->txfm_size = TX_4X4;

-  }

-#if CONFIG_NEWBESTREFMV

-  else

+  } else {

     vp9_tokenize_mb(cpi, &x->e_mbd, t, 1);

-#endif

+  }

-extern void vp9_fix_contexts(MACROBLOCKD *xd);

-void vp9_encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,

-                                 TOKENEXTRA **t, int recon_yoffset,

-                                 int recon_uvoffset, int output_enabled) {

+static void encode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,

+                                    TOKENEXTRA **t, int recon_yoffset,

+                                    int recon_uvoffset, int output_enabled) {

   VP9_COMMON *cm = &cpi->common;

   MACROBLOCKD *const xd = &x->e_mbd;

   MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;

@@ -2172,9 +2146,10 @@

 #if CONFIG_SUPERBLOCKS

-void vp9_encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,

-                                 int recon_yoffset, int recon_uvoffset,

-                                 int mb_col, int mb_row) {

+static void encode_inter_superblock(VP9_COMP *cpi, MACROBLOCK *x,

+                                    TOKENEXTRA **t, int recon_yoffset,

+                                    int recon_uvoffset,

+                                    int mb_col, int mb_row) {

   const int output_enabled = 1;

   VP9_COMMON *cm = &cpi->common;

   MACROBLOCKD *xd = &x->e_mbd;

--- /dev/null

+++ b/vp9/encoder/encodeframe.h

@@ -1,0 +1,21 @@

+/*

+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#ifndef __INC_ENCODEFRAME_H

+#define __INC_ENCODEFRAME_H

+struct macroblock;

+extern void vp9_build_block_offsets(struct macroblock *x);

+extern void vp9_setup_block_ptrs(struct macroblock *x);

+#endif  // __INC_ENCODEFRAME_H

--- a/vp9/encoder/encodemv.c

+++ b/vp9/encoder/encodemv.c

@@ -168,7 +168,7 @@

   if (cur_b - mod_b - cost > 0) {

     return cur_b - mod_b - cost;

   } else {

-    return -vp9_cost_zero(upd_p);

+    return 0 - vp9_cost_zero(upd_p);

--- a/vp9/encoder/encodemv.h

+++ b/vp9/encoder/encodemv.h

@@ -18,11 +18,11 @@

 void vp9_encode_nmv(vp9_writer* const w, const MV* const mv,

                     const MV* const ref, const nmv_context* const mvctx);

 void vp9_encode_nmv_fp(vp9_writer* const w, const MV* const mv,

-                       const MV* const ref, const nmv_context *mvctx,

+                       const MV* const ref, const nmv_context* const mvctx,

                        int usehp);

 void vp9_build_nmv_cost_table(int *mvjoint,

                               int *mvcost[2],

-                              const nmv_context *mvctx,

+                              const nmv_context* const mvctx,

                               int usehp,

                               int mvc_flag_v,

                               int mvc_flag_h);

--- a/vp9/encoder/firstpass.c

+++ b/vp9/encoder/firstpass.c

@@ -18,6 +18,7 @@

 #include "mcomp.h"

 #include "firstpass.h"

 #include "vpx_scale/vpxscale.h"

+#include "vp9/encoder/encodeframe.h"

 #include "encodemb.h"

 #include "vp9/common/extend.h"

 #include "vp9/common/systemdependent.h"

@@ -24,6 +25,7 @@

 #include "vpx_mem/vpx_mem.h"

 #include "vp9/common/swapyv12buffer.h"

 #include <stdio.h>

+#include "vp9/encoder/quantize.h"

 #include "rdopt.h"

 #include "ratectrl.h"

 #include "vp9/common/quant_common.h"

@@ -38,17 +40,6 @@

 #define IF_RTCD(x) NULL

 #endif

-extern void vp9_build_block_offsets(MACROBLOCK *x);

-extern void vp9_setup_block_ptrs(MACROBLOCK *x);

-extern void vp9_frame_init_quantizer(VP9_COMP *cpi);

-extern void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb,

-                                   int_mv *mv);

-extern void vp9_alloc_compressor_data(VP9_COMP *cpi);

 #define IIFACTOR   12.5

 #define IIKFACTOR1 12.5

 #define IIKFACTOR2 15.0

@@ -705,9 +696,9 @@

     FIRSTPASS_STATS fps;

     fps.frame      = cm->current_video_frame;

-    fps.intra_error = intra_error >> 8;

-    fps.coded_error = coded_error >> 8;

-    fps.sr_coded_error = sr_coded_error >> 8;

+    fps.intra_error = (double)(intra_error >> 8);

+    fps.coded_error = (double)(coded_error >> 8);

+    fps.sr_coded_error = (double)(sr_coded_error >> 8);

     weight = simple_weight(cpi->Source);

@@ -747,8 +738,8 @@

     // TODO:  handle the case when duration is set to 0, or something less

     // than the full time between subsequent cpi->source_time_stamp s  .

-    fps.duration = cpi->source->ts_end

-                   - cpi->source->ts_start;

+    fps.duration = (double)(cpi->source->ts_end

+                            - cpi->source->ts_start);

     // don't want to do output stats with a stack variable!

     memcpy(cpi->twopass.this_frame_stats,

@@ -910,7 +901,7 @@

   double err_per_mb = section_err / num_mbs;

   double err_correction_factor;

   double speed_correction = 1.0;

-  int overhead_bits_per_mb;

+  double overhead_bits_per_mb;

   if (section_target_bandwitdh <= 0)

     return cpi->twopass.maxq_max_limit;          // Highest value allowed

@@ -985,7 +976,7 @@

       err_correction_factor = 5.0;

     bits_per_mb_at_this_q =

-      vp9_bits_per_mb(INTER_FRAME, Q) + overhead_bits_per_mb;

+      vp9_bits_per_mb(INTER_FRAME, Q) + (int)overhead_bits_per_mb;

     bits_per_mb_at_this_q = (int)(.5 + err_correction_factor *

                                   (double)bits_per_mb_at_this_q);

@@ -1012,7 +1003,7 @@

   // Give average a chance to settle though.

   // PGW TODO.. This code is broken for the extended Q range

   if ((cpi->ni_frames >

-       ((unsigned int)cpi->twopass.total_stats->count >> 8)) &&

+       ((int)cpi->twopass.total_stats->count >> 8)) &&

       (cpi->ni_frames > 150)) {

     adjust_maxq_qrange(cpi);

@@ -1038,7 +1029,7 @@

   double speed_correction = 1.0;

   double clip_iiratio;

   double clip_iifactor;

-  int overhead_bits_per_mb;

+  double overhead_bits_per_mb;

   target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20))

@@ -1091,7 +1082,7 @@

       err_correction_factor = 5.0;

     bits_per_mb_at_this_q =

-      vp9_bits_per_mb(INTER_FRAME, Q) + overhead_bits_per_mb;

+      vp9_bits_per_mb(INTER_FRAME, Q) + (int)overhead_bits_per_mb;

     bits_per_mb_at_this_q = (int)(.5 + err_correction_factor *

                                   (double)bits_per_mb_at_this_q);

@@ -1426,7 +1417,7 @@

                     calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out));

-  *f_boost = boost_score;

+  *f_boost = (int)boost_score;

   // Reset for backward looking loop

   boost_score = 0.0;

@@ -1464,7 +1455,7 @@

                     calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out));

-  *b_boost = boost_score;

+  *b_boost = (int)boost_score;

   arf_boost = (*f_boost + *b_boost);

   if (arf_boost < ((b_frames + f_frames) * 20))

@@ -1486,8 +1477,8 @@

   // Note: this_frame->frame has been updated in the loop

   // so it now points at the ARF frame.

   half_gf_int = cpi->baseline_gf_interval >> 1;

-  frames_after_arf = cpi->twopass.total_stats->count -

-                     this_frame->frame - 1;

+  frames_after_arf = (int)(cpi->twopass.total_stats->count -

+                           this_frame->frame - 1);

   switch (cpi->oxcf.arnr_type) {

     case 1: // Backward filter

@@ -1710,7 +1701,7 @@

       (cpi->twopass.kf_group_error_left > 0)) {

     cpi->twopass.gf_group_bits =

       (int)((double)cpi->twopass.kf_group_bits *

-            (gf_group_err / (double)cpi->twopass.kf_group_error_left));

+            (gf_group_err / cpi->twopass.kf_group_error_left));

   } else

     cpi->twopass.gf_group_bits = 0;

@@ -1776,7 +1767,7 @@

       alt_gf_grp_bits =

         (double)cpi->twopass.kf_group_bits  *

         (mod_frame_err * (double)cpi->baseline_gf_interval) /

-        DOUBLE_DIVIDE_CHECK((double)cpi->twopass.kf_group_error_left);

+        DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left);

       alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits /

                                            (double)allocation_chunks));

@@ -1792,7 +1783,7 @@

       int alt_gf_bits =

         (int)((double)cpi->twopass.kf_group_bits *

               mod_frame_err /

-              DOUBLE_DIVIDE_CHECK((double)cpi->twopass.kf_group_error_left));

+              DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left));

       if (alt_gf_bits > gf_bits) {

         gf_bits = alt_gf_bits;

@@ -1815,7 +1806,7 @@

     // Adjust KF group bits and error remainin

-    cpi->twopass.kf_group_error_left -= gf_group_err;

+    cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err;

     cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits;

     if (cpi->twopass.kf_group_bits < 0)

@@ -1826,9 +1817,10 @@

     // of the group (except in Key frame case where this has already

     // happened)

     if (!cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME)

-      cpi->twopass.gf_group_error_left = gf_group_err - gf_first_frame_err;

+      cpi->twopass.gf_group_error_left = (int64_t)(gf_group_err

+                                                   - gf_first_frame_err);

     else

-      cpi->twopass.gf_group_error_left = gf_group_err;

+      cpi->twopass.gf_group_error_left = (int64_t)gf_group_err;

     cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth;

@@ -1848,8 +1840,8 @@

         pct_extra = (boost - 100) / 50;

         pct_extra = (pct_extra > 20) ? 20 : pct_extra;

-        cpi->twopass.alt_extra_bits =

-          (cpi->twopass.gf_group_bits * pct_extra) / 100;

+        cpi->twopass.alt_extra_bits = (int)

+          ((cpi->twopass.gf_group_bits * pct_extra) / 100);

         cpi->twopass.gf_group_bits -= cpi->twopass.alt_extra_bits;

         cpi->twopass.alt_extra_bits /=

           ((cpi->baseline_gf_interval - 1) >> 1);

@@ -1872,9 +1864,9 @@

     avg_stats(&sectionstats);

-    cpi->twopass.section_intra_rating =

-      sectionstats.intra_error /

-      DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);

+    cpi->twopass.section_intra_rating = (int)

+      (sectionstats.intra_error /

+      DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));

     reset_fpf_position(cpi, start_pos);

@@ -1907,10 +1899,11 @@

       target_frame_size = max_bits;

     if (target_frame_size > cpi->twopass.gf_group_bits)

-      target_frame_size = cpi->twopass.gf_group_bits;

+      target_frame_size = (int)cpi->twopass.gf_group_bits;

-  cpi->twopass.gf_group_error_left -= modified_err;                                               // Adjust error remaining

+  // Adjust error remaining

+  cpi->twopass.gf_group_error_left -= (int64_t)modified_err;

   cpi->twopass.gf_group_bits -= target_frame_size;                                                // Adjust bits remaining

   if (cpi->twopass.gf_group_bits < 0)

@@ -2019,25 +2012,26 @@

   // Keep a globally available copy of this and the next frame's iiratio.

-  cpi->twopass.this_iiratio = this_frame_intra_error /

-                              DOUBLE_DIVIDE_CHECK(this_frame_coded_error);

+  cpi->twopass.this_iiratio = (int)(this_frame_intra_error /

+                              DOUBLE_DIVIDE_CHECK(this_frame_coded_error));

     FIRSTPASS_STATS next_frame;

     if (lookup_next_frame_stats(cpi, &next_frame) != EOF) {

-      cpi->twopass.next_iiratio = next_frame.intra_error /

-                                  DOUBLE_DIVIDE_CHECK(next_frame.coded_error);

+      cpi->twopass.next_iiratio = (int)(next_frame.intra_error /

+                                  DOUBLE_DIVIDE_CHECK(next_frame.coded_error));

   // Set nominal per second bandwidth for this frame

-  cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate;

+  cpi->target_bandwidth = (int)(cpi->per_frame_bandwidth

+                                * cpi->output_frame_rate);

   if (cpi->target_bandwidth < 0)

     cpi->target_bandwidth = 0;

   // Account for mv, mode and other overheads.

-  overhead_bits = estimate_modemvcost(

-                    cpi, cpi->twopass.total_left_stats);

+  overhead_bits = (int)estimate_modemvcost(

+                        cpi, cpi->twopass.total_left_stats);

   // Special case code for first frame.

   if (cpi->common.current_video_frame == 0) {

@@ -2427,9 +2421,9 @@

     avg_stats(&sectionstats);

-    cpi->twopass.section_intra_rating =

-      sectionstats.intra_error

-      / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);

+    cpi->twopass.section_intra_rating = (int)

+      (sectionstats.intra_error

+      / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));

   // Reset the first pass file position

@@ -2437,7 +2431,7 @@

   // Work out how many bits to allocate for the key frame itself

   if (1) {

-    int kf_boost = boost_score;

+    int kf_boost = (int)boost_score;

     int allocation_chunks;

     int alt_kf_bits;

@@ -2519,10 +2513,14 @@

     cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits;

-    cpi->twopass.kf_bits += cpi->min_frame_bandwidth;                                          // Add in the minimum frame allowance

+    // Add in the minimum frame allowance

+    cpi->twopass.kf_bits += cpi->min_frame_bandwidth;

-    cpi->per_frame_bandwidth = cpi->twopass.kf_bits;                                           // Peer frame bit target for this frame

-    cpi->target_bandwidth = cpi->twopass.kf_bits * cpi->output_frame_rate;                      // Convert to a per second bitrate

+    // Peer frame bit target for this frame

+    cpi->per_frame_bandwidth = cpi->twopass.kf_bits;

+    // Convert to a per second bitrate

+    cpi->target_bandwidth = (int)(cpi->twopass.kf_bits *

+                                  cpi->output_frame_rate);

   // Note the total error score of the kf group minus the key frame itself

--- a/vp9/encoder/generic/csystemdependent.c

+++ b/vp9/encoder/generic/csystemdependent.c

@@ -27,10 +27,6 @@

 void vp9_cmachine_specific_config(VP9_COMP *cpi) {

 #if CONFIG_RUNTIME_CPU_DETECT

   cpi->rtcd.common                    = &cpi->common.rtcd;

-  cpi->rtcd.search.full_search             = vp9_full_search_sad;

-  cpi->rtcd.search.refining_search         = vp9_refining_search_sad;

-  cpi->rtcd.search.diamond_search          = vp9_diamond_search_sad;

   cpi->rtcd.temporal.apply                 = vp9_temporal_filter_apply_c;

 #endif

--- a/vp9/encoder/lookahead.c

+++ b/vp9/encoder/lookahead.c

@@ -43,7 +43,7 @@

 vp9_lookahead_destroy(struct lookahead_ctx *ctx) {

   if (ctx) {

     if (ctx->buf) {

-      int i;

+      unsigned int i;

       for (i = 0; i < ctx->max_sz; i++)

         vp8_yv12_de_alloc_frame_buffer(&ctx->buf[i].img);

@@ -59,7 +59,6 @@

                    unsigned int height,

                    unsigned int depth) {

   struct lookahead_ctx *ctx = NULL;

-  int i;

   /* Clamp the lookahead queue depth */

   if (depth < 1)

@@ -74,6 +73,7 @@

   /* Allocate the lookahead structures */

   ctx = calloc(1, sizeof(*ctx));

   if (ctx) {

+    unsigned int i;

     ctx->max_sz = depth;

     ctx->buf = calloc(depth, sizeof(*ctx->buf));

     if (!ctx->buf)

@@ -175,9 +175,9 @@

   struct lookahead_entry *buf = NULL;

   assert(index < ctx->max_sz);

-  if (index < ctx->sz) {

+  if (index < (int)ctx->sz) {

     index += ctx->read_idx;

-    if (index >= ctx->max_sz)

+    if (index >= (int)ctx->max_sz)

       index -= ctx->max_sz;

     buf = ctx->buf + index;

--- a/vp9/encoder/mbgraph.c

+++ b/vp9/encoder/mbgraph.c

@@ -139,7 +139,7 @@

   // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well

   if (ref_mv->as_int) {

-    int tmp_err;

+    unsigned int tmp_err;

     int_mv zero_ref_mv, tmp_mv;

     zero_ref_mv.as_int = 0;

@@ -202,7 +202,7 @@

   MACROBLOCK   *const x  = &cpi->mb;

   MACROBLOCKD *const xd = &x->e_mbd;

   MB_PREDICTION_MODE best_mode = -1, mode;

-  int best_err = INT_MAX;

+  unsigned int best_err = INT_MAX;

   // calculate SATD for each intra prediction mode;

   // we're intentionally not doing 4x4, we just want a rough estimate

@@ -449,7 +449,7 @@

   // being a GF - so exit if we don't look ahead beyond that

   if (n_frames <= cpi->frames_till_gf_update_due)

     return;

-  if (n_frames > cpi->common.frames_till_alt_ref_frame)

+  if (n_frames > (int)cpi->common.frames_till_alt_ref_frame)

     n_frames = cpi->common.frames_till_alt_ref_frame;

   if (n_frames > MAX_LAG_BUFFERS)

     n_frames = MAX_LAG_BUFFERS;

--- a/vp9/encoder/mcomp.c

+++ b/vp9/encoder/mcomp.c

@@ -1255,11 +1255,11 @@

 #undef CHECK_POINT

 #undef CHECK_BETTER

-int vp9_diamond_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d,

-                           int_mv *ref_mv, int_mv *best_mv,

-                           int search_param, int sad_per_bit, int *num00,

-                           vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,

-                           int_mv *center_mv) {

+int vp9_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,

+                             int_mv *ref_mv, int_mv *best_mv,

+                             int search_param, int sad_per_bit, int *num00,

+                             vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,

+                             int_mv *center_mv) {

   int i, j, step;

   unsigned char *what = (*(b->base_src) + b->src);

@@ -1380,7 +1380,7 @@

   int tot_steps;

   int_mv this_mv;

-  int bestsad = INT_MAX;

+  unsigned int bestsad = INT_MAX;

   int best_site = 0;

   int last_site = 0;

@@ -1568,10 +1568,10 @@

   return bestsme;

-int vp9_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,

-                        int sad_per_bit, int distance,

-                        vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,

-                        int_mv *center_mv) {

+int vp9_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,

+                          int sad_per_bit, int distance,

+                          vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,

+                          int_mv *center_mv) {

   unsigned char *what = (*(b->base_src) + b->src);

   int what_stride = b->src_stride;

   unsigned char *in_what;

@@ -1674,7 +1674,7 @@

   unsigned char *bestaddress;

   int_mv *best_mv = &d->bmi.as_mv.first;

   int_mv this_mv;

-  int bestsad = INT_MAX;

+  unsigned int bestsad = INT_MAX;

   int r, c;

   unsigned char *check_here;

@@ -1802,7 +1802,7 @@

   unsigned char *bestaddress;

   int_mv *best_mv = &d->bmi.as_mv.first;

   int_mv this_mv;

-  int bestsad = INT_MAX;

+  unsigned int bestsad = INT_MAX;

   int r, c;

   unsigned char *check_here;

@@ -1942,11 +1942,10 @@

   else

     return INT_MAX;

-int vp9_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,

-                            int error_per_bit, int search_range,

-                            vp9_variance_fn_ptr_t *fn_ptr, DEC_MVCOSTS,

-                            int_mv *center_mv) {

+int vp9_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,

+                              int_mv *ref_mv, int error_per_bit,

+                              int search_range, vp9_variance_fn_ptr_t *fn_ptr,

+                              DEC_MVCOSTS, int_mv *center_mv) {

   MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};

   int i, j;

   short this_row_offset, this_col_offset;

--- a/vp9/encoder/mcomp.h

+++ b/vp9/encoder/mcomp.h

@@ -68,87 +68,24 @@

 extern fractional_mv_step_fp vp9_find_best_sub_pixel_step;

 extern fractional_mv_step_fp vp9_find_best_half_pixel_step;

-#define prototype_full_search_sad(sym)\

-  int (sym)\

-  (\

-   MACROBLOCK *x, \

-   BLOCK *b, \

-   BLOCKD *d, \

-   int_mv *ref_mv, \

-   int sad_per_bit, \

-   int distance, \

-   vp9_variance_fn_ptr_t *fn_ptr, \

-   DEC_MVSADCOSTS, \

-   int_mv *center_mv \

-  )

+typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d,

+                                    int_mv *ref_mv, int sad_per_bit,

+                                    int distance, vp9_variance_fn_ptr_t *fn_ptr,

+                                    DEC_MVCOSTS, int_mv *center_mv);

-#define prototype_refining_search_sad(sym)\

-  int (sym)\

-  (\

-   MACROBLOCK *x, \

-   BLOCK *b, \

-   BLOCKD *d, \

-   int_mv *ref_mv, \

-   int sad_per_bit, \

-   int distance, \

-   vp9_variance_fn_ptr_t *fn_ptr, \

-   DEC_MVSADCOSTS, \

-   int_mv *center_mv \

-  )

+typedef int (*vp9_refining_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d,

+                                        int_mv *ref_mv, int sad_per_bit,

+                                        int distance,

+                                        vp9_variance_fn_ptr_t *fn_ptr,

+                                        DEC_MVCOSTS, int_mv *center_mv);

-#define prototype_diamond_search_sad(sym)\

-  int (sym)\

-  (\

-   MACROBLOCK *x, \

-   BLOCK *b, \

-   BLOCKD *d, \

-   int_mv *ref_mv, \

-   int_mv *best_mv, \

-   int search_param, \

-   int sad_per_bit, \

-   int *num00, \

-   vp9_variance_fn_ptr_t *fn_ptr, \

-   DEC_MVSADCOSTS, \

-   int_mv *center_mv \

-  )

+typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d,

+                                       int_mv *ref_mv, int_mv *best_mv,

+                                       int search_param, int sad_per_bit,

+                                       int *num00,

+                                       vp9_variance_fn_ptr_t *fn_ptr,

+                                       DEC_MVCOSTS, int_mv *center_mv);

-#if ARCH_X86 || ARCH_X86_64

-#include "x86/mcomp_x86.h"

-#endif

-typedef prototype_full_search_sad(*vp9_full_search_fn_t);

-extern prototype_full_search_sad(vp9_full_search_sad);

-extern prototype_full_search_sad(vp9_full_search_sadx3);

-extern prototype_full_search_sad(vp9_full_search_sadx8);

-typedef prototype_refining_search_sad(*vp9_refining_search_fn_t);

-extern prototype_refining_search_sad(vp9_refining_search_sad);

-extern prototype_refining_search_sad(vp9_refining_search_sadx4);

-typedef prototype_diamond_search_sad(*vp9_diamond_search_fn_t);

-extern prototype_diamond_search_sad(vp9_diamond_search_sad);

-extern prototype_diamond_search_sad(vp9_diamond_search_sadx4);

-#ifndef vp9_search_full_search

-#define vp9_search_full_search vp9_full_search_sad

-#endif

-extern prototype_full_search_sad(vp9_search_full_search);

-#ifndef vp9_search_refining_search

-#define vp9_search_refining_search vp9_refining_search_sad

-#endif

-extern prototype_refining_search_sad(vp9_search_refining_search);

-#ifndef vp9_search_diamond_search

-#define vp9_search_diamond_search vp9_diamond_search_sad

-#endif

-extern prototype_diamond_search_sad(vp9_search_diamond_search);

-typedef struct {

-  prototype_full_search_sad(*full_search);

-  prototype_refining_search_sad(*refining_search);

-  prototype_diamond_search_sad(*diamond_search);

-} vp9_search_rtcd_vtable_t;

 #if CONFIG_RUNTIME_CPU_DETECT

 #define SEARCH_INVOKE(ctx,fn) (ctx)->fn

--- a/vp9/encoder/onyx_if.c

+++ b/vp9/encoder/onyx_if.c

@@ -36,12 +36,11 @@

 #include "vp9/common/pred_common.h"

 #include "vp9/encoder/rdopt.h"

 #include "bitstream.h"

+#include "vp9/encoder/picklpf.h"

 #include "ratectrl.h"

-#if CONFIG_NEWBESTREFMV

 #include "vp9/common/mvref_common.h"

-#endif

 #if ARCH_ARM

 #include "vpx_ports/arm.h"

 #endif

@@ -58,18 +57,8 @@

 #define RTCD(x) NULL

 #endif

-extern void vp9_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi);

-extern void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val);

-extern void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi);

 extern void vp9_cmachine_specific_config(VP9_COMP *cpi);

-extern void vp9_deblock_frame(YV12_BUFFER_CONFIG *source,

-                              YV12_BUFFER_CONFIG *post,

-                              int filt_lvl, int low_var_thresh, int flag);

 extern void print_tree_update_probs();

 #if HAVE_ARMV7

@@ -80,10 +69,6 @@

                                               YV12_BUFFER_CONFIG *dst_ybc);

 #endif

-int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest);

-extern void vp9_temporal_filter_prepare_c(VP9_COMP *cpi, int distance);

 static void set_default_lf_deltas(VP9_COMP *cpi);

 #define DEFAULT_INTERP_FILTER EIGHTTAP  /* SWITCHABLE for better performance */

@@ -257,7 +242,7 @@

       skip_prob = 255;

     base_skip_false_prob[i][1] = skip_prob;

-    skip_prob = t * 0.75;

+    skip_prob = t * 3 / 4;

     if (skip_prob < 1)

       skip_prob = 1;

     else if (skip_prob > 255)

@@ -264,7 +249,7 @@

       skip_prob = 255;

     base_skip_false_prob[i][2] = skip_prob;

-    skip_prob = t * 1.25;

+    skip_prob = t * 5 / 4;

     if (skip_prob < 1)

       skip_prob = 1;

     else if (skip_prob > 255)

@@ -1413,7 +1398,7 @@

   int64_t llden = denom;

   int64_t llval = val;

-  return llval * llnum / llden;

+  return (int)(llval * llnum / llden);

@@ -1925,7 +1910,7 @@

     vp9_init_first_pass(cpi);

   } else if (cpi->pass == 2) {

     size_t packet_sz = sizeof(FIRSTPASS_STATS);

-    int packets = oxcf->two_pass_stats_in.sz / packet_sz;

+    int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);

     cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;

     cpi->twopass.stats_in = cpi->twopass.stats_in_start;

@@ -1989,9 +1974,9 @@

   cpi->fn_ptr[BLOCK_4X4].copymem    = vp9_copy32xn;

 #endif

-  cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search);

-  cpi->diamond_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, diamond_search);

-  cpi->refining_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, refining_search);

+  cpi->full_search_sad = vp9_full_search_sad;

+  cpi->diamond_search_sad = vp9_diamond_search_sad;

+  cpi->refining_search_sad = vp9_refining_search_sad;

   // make sure frame 1 is okay

   cpi->error_bins[0] = cpi->common.MBs;

@@ -2351,7 +2336,7 @@

   for (i = 0; i < 4; i++)

     pkt.data.psnr.psnr[i] = vp9_mse2psnr(pkt.data.psnr.samples[i], 255.0,

-                                         pkt.data.psnr.sse[i]);

+                                         (double)pkt.data.psnr.sse[i]);

   vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);

@@ -2917,7 +2902,9 @@

   // pass function that sets the target bandwidth so must set it here

   if (cpi->common.refresh_alt_ref_frame) {

     cpi->per_frame_bandwidth = cpi->twopass.gf_bits;                           // Per frame bit target for the alt ref frame

-    cpi->target_bandwidth = cpi->twopass.gf_bits * cpi->output_frame_rate;      // per second target bitrate

+    // per second target bitrate

+    cpi->target_bandwidth = (int)(cpi->twopass.gf_bits *

+                                  cpi->output_frame_rate);

   // Default turn off buffer to buffer copying

@@ -4119,7 +4106,7 @@

                       - cpi->last_time_stamp_seen;

       // do a step update if the duration changes by 10%

       if (last_duration)

-        step = ((this_duration - last_duration) * 10 / last_duration);

+        step = (int)((this_duration - last_duration) * 10 / last_duration);

     if (this_duration) {

@@ -4132,7 +4119,8 @@

          * frame rate. If we haven't seen 1 second yet, then average

          * over the whole interval seen.

*/

-        interval = cpi->source->ts_end - cpi->first_time_stamp_ever;

+        interval = (double)(cpi->source->ts_end

+                            - cpi->first_time_stamp_ever);

         if (interval > 10000000.0)

           interval = 10000000;

@@ -4234,17 +4222,17 @@

         int y_samples = orig->y_height * orig->y_width;

         int uv_samples = orig->uv_height * orig->uv_width;

         int t_samples = y_samples + 2 * uv_samples;

-        int64_t sq_error;

+        double sq_error;

-        ye = calc_plane_error(orig->y_buffer, orig->y_stride,

+        ye = (double)calc_plane_error(orig->y_buffer, orig->y_stride,

                               recon->y_buffer, recon->y_stride, orig->y_width,

                               orig->y_height);

-        ue = calc_plane_error(orig->u_buffer, orig->uv_stride,

+        ue = (double)calc_plane_error(orig->u_buffer, orig->uv_stride,

                               recon->u_buffer, recon->uv_stride, orig->uv_width,

                               orig->uv_height);

-        ve = calc_plane_error(orig->v_buffer, orig->uv_stride,

+        ve = (double)calc_plane_error(orig->v_buffer, orig->uv_stride,

                               recon->v_buffer, recon->uv_stride, orig->uv_width,

                               orig->uv_height);

@@ -4265,15 +4253,15 @@

 #endif

           vp9_clear_system_state();

-          ye = calc_plane_error(orig->y_buffer, orig->y_stride,

+          ye = (double)calc_plane_error(orig->y_buffer, orig->y_stride,

                                 pp->y_buffer, pp->y_stride, orig->y_width,

                                 orig->y_height);

-          ue = calc_plane_error(orig->u_buffer, orig->uv_stride,

+          ue = (double)calc_plane_error(orig->u_buffer, orig->uv_stride,

                                 pp->u_buffer, pp->uv_stride, orig->uv_width,

                                 orig->uv_height);

-          ve = calc_plane_error(orig->v_buffer, orig->uv_stride,

+          ve = (double)calc_plane_error(orig->v_buffer, orig->uv_stride,

                                 pp->v_buffer, pp->uv_stride, orig->uv_width,

                                 orig->uv_height);

--- a/vp9/encoder/onyx_int.h

+++ b/vp9/encoder/onyx_int.h

@@ -349,7 +349,6 @@

 typedef struct VP9_ENCODER_RTCD {

   VP9_COMMON_RTCD            *common;

-  vp9_search_rtcd_vtable_t    search;

   vp9_temporal_rtcd_vtable_t  temporal;

 } VP9_ENCODER_RTCD;

@@ -665,7 +664,8 @@

     int maxq_min_limit;

     int static_scene_max_gf_interval;

     int kf_bits;

-    int gf_group_error_left;           // Remaining error from uncoded frames in a gf group. Two pass use only

+    // Remaining error from uncoded frames in a gf group. Two pass use only

+    int64_t gf_group_error_left;

     // Projected total bits available for a key frame group of frames

     int64_t kf_group_bits;

@@ -673,8 +673,10 @@

     // Error score of frames still to be coded in kf group

     int64_t kf_group_error_left;

-    int gf_group_bits;                // Projected Bits available for a group of frames including 1 GF or ARF

-    int gf_bits;                     // Bits for the golden frame or ARF - 2 pass only

+    // Projected Bits available for a group of frames including 1 GF or ARF

+    int64_t gf_group_bits;

+    // Bits for the golden frame or ARF - 2 pass only

+    int gf_bits;

     int alt_extra_bits;

     int sr_update_lag;

@@ -764,10 +766,12 @@

 void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x);

-void vp9_tokenize_mb(VP9_COMP *, MACROBLOCKD *, TOKENEXTRA **, int dry_run);

-void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run);

 void vp9_set_speed_features(VP9_COMP *cpi);

+extern int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source,

+                           YV12_BUFFER_CONFIG *dest);

+extern void vp9_alloc_compressor_data(VP9_COMP *cpi);

 #if CONFIG_DEBUG

 #define CHECK_MEM_ERROR(lval,expr) do {\

--- a/vp9/encoder/picklpf.c

+++ b/vp9/encoder/picklpf.c

@@ -11,6 +11,7 @@

 #include "vp9/common/onyxc_int.h"

 #include "onyx_int.h"

+#include "vp9/encoder/picklpf.h"

 #include "quantize.h"

 #include "vpx_mem/vpx_mem.h"

 #include "vpx_scale/vpxscale.h"

@@ -20,8 +21,6 @@

 #include "vpx_ports/arm.h"

 #endif

-extern int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source,

-                           YV12_BUFFER_CONFIG *dest);

 #if HAVE_ARMV7

 extern void vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);

 #endif

--- /dev/null

+++ b/vp9/encoder/picklpf.h

@@ -1,0 +1,26 @@

+/*

+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#ifndef __INC_PICKLPF_H

+#define __INC_PICKLPF_H

+struct yv12_buffer_config;

+struct VP9_COMP;

+extern void vp9_pick_filter_level_fast(struct yv12_buffer_config *sd,

+                                       struct VP9_COMP *cpi);

+extern void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val);

+extern void vp9_pick_filter_level(struct yv12_buffer_config *sd,

+                                  struct VP9_COMP *cpi);

+#endif  // __INC_PICKLPF_H

--- a/vp9/encoder/ratectrl.c

+++ b/vp9/encoder/ratectrl.c

@@ -311,8 +311,8 @@

   target = cpi->per_frame_bandwidth;

   if (cpi->oxcf.rc_max_intra_bitrate_pct) {

-    unsigned int max_rate = cpi->per_frame_bandwidth

-                            * cpi->oxcf.rc_max_intra_bitrate_pct / 100;

+    int max_rate = cpi->per_frame_bandwidth

+                 * cpi->oxcf.rc_max_intra_bitrate_pct / 100;

     if (target > max_rate)

       target = max_rate;

--- a/vp9/encoder/rdopt.c

+++ b/vp9/encoder/rdopt.c

@@ -41,9 +41,7 @@

 #include "vp9/common/pred_common.h"

 #include "vp9/common/entropy.h"

 #include "vpx_rtcd.h"

-#if CONFIG_NEWBESTREFMV

 #include "vp9/common/mvref_common.h"

-#endif

 #if CONFIG_RUNTIME_CPU_DETECT

 #define IF_RTCD(x)  (x)

@@ -51,9 +49,6 @@

 #define IF_RTCD(x)  NULL

 #endif

-extern void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x);

-extern void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x);

 #define MAXF(a,b)            (((a) > (b)) ? (a) : (b))

 #define INVALID_MV 0x80008000

@@ -391,59 +386,6 @@

-void vp9_auto_select_speed(VP9_COMP *cpi) {

-  int milliseconds_for_compress = (int)(1000000 / cpi->oxcf.frame_rate);

-  milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;

-  /*

-  // this is done during parameter valid check

-  if( cpi->oxcf.cpu_used > 16)

-      cpi->oxcf.cpu_used = 16;

-  if( cpi->oxcf.cpu_used < -16)

-      cpi->oxcf.cpu_used = -16;

-  */

-  if (cpi->avg_pick_mode_time < milliseconds_for_compress &&

-      (cpi->avg_encode_time - cpi->avg_pick_mode_time) <

-      milliseconds_for_compress) {

-    if (cpi->avg_pick_mode_time == 0) {

-      cpi->Speed = 4;

-    } else {

-      if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) {

-        cpi->Speed          += 2;

-        cpi->avg_pick_mode_time = 0;

-        cpi->avg_encode_time = 0;

-        if (cpi->Speed > 16) {

-          cpi->Speed = 16;

-        }

-      }

-      if (milliseconds_for_compress * 100 >

-          cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) {

-        cpi->Speed          -= 1;

-        cpi->avg_pick_mode_time = 0;

-        cpi->avg_encode_time = 0;

-        // In real-time mode, cpi->speed is in [4, 16].

-        if (cpi->Speed < 4) {      // if ( cpi->Speed < 0 )

-          cpi->Speed = 4;        // cpi->Speed = 0;

-        }

-      }

-    }

-  } else {

-    cpi->Speed += 4;

-    if (cpi->Speed > 16)

-      cpi->Speed = 16;

-    cpi->avg_pick_mode_time = 0;

-    cpi->avg_encode_time = 0;

-  }

-}

 int vp9_block_error_c(short *coeff, short *dqcoeff, int block_size) {

   int i, error = 0;

@@ -613,7 +555,7 @@

       default_eob = 64;

       if (type == PLANE_TYPE_Y_WITH_DC) {

         BLOCKD *bb;

-        int ib = (b - xd->block);

+        int ib = (int)(b - xd->block);

         if (ib < 16) {

           ib = (ib & 8) + ((ib & 4) >> 1);

           bb = xd->block + ib;

@@ -707,9 +649,6 @@

   BLOCK *beptr;

   int d;

-  vp9_subtract_mby(mb->src_diff, *(mb->block[0].base_src), xd->predictor,

-                   mb->block[0].src_stride);

   // Fdct and building the 2nd order block

   for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) {

     mb->vp9_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);

@@ -778,9 +717,6 @@

   BLOCKD *const x_y2  = xd->block + 24;

   int d;

-  vp9_subtract_mby(mb->src_diff, *(mb->block[0].base_src), xd->predictor,

-                   mb->block[0].src_stride);

   vp9_transform_mby_8x8(mb);

   vp9_quantize_mby_8x8(mb);

@@ -827,9 +763,6 @@

   BLOCK  *be = &mb->block[0];

   TX_TYPE tx_type;

-  vp9_subtract_mby(mb->src_diff, *(mb->block[0].base_src), mb->e_mbd.predictor,

-                   mb->block[0].src_stride);

   tx_type = get_tx_type_16x16(xd, b);

   if (tx_type != DCT_DCT) {

     vp9_fht(be->src_diff, 32, be->coeff, tx_type, 16);

@@ -866,7 +799,9 @@

   int d16x16, r16x16, r16x16s, s16x16;

   int64_t rd16x16, rd16x16s;

-  // FIXME don't do sub x3

+  vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,

+                   x->block[0].src_stride);

   if (skip_prob == 0)

     skip_prob = 1;

   s0 = vp9_cost_bit(skip_prob, 0);

@@ -1243,7 +1178,7 @@

   cost += vp9_cost_bit(128, allow_comp);

 #endif

   *Rate = cost;

-  *rate_y += tot_rate_y;

+  *rate_y = tot_rate_y;

   *Distortion = distortion;

   return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);

@@ -1579,7 +1514,7 @@

 #endif

   *Rate = cost;

-  *rate_y += tot_rate_y;

+  *rate_y = tot_rate_y;

   *Distortion = distortion;

   return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);

@@ -2767,7 +2702,7 @@

   if (mbmi->second_ref_frame)

     x->partition_info->bmi[15].second_mv.as_int = bsi.second_mvs[15].as_int;

-  return bsi.segment_rd;

+  return (int)(bsi.segment_rd);

 /* Order arr in increasing order, original position stored in idx */

@@ -3251,7 +3186,6 @@

   MACROBLOCKD *xd = &x->e_mbd;

   MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;

   vp9_find_near_mvs(xd, xd->mode_info_context,

                     xd->prev_mode_info_context,

                     &frame_nearest_mv[frame_type], &frame_near_mv[frame_type],

@@ -3262,7 +3196,6 @@

   u_buffer[frame_type] = yv12->u_buffer + recon_uvoffset;

   v_buffer[frame_type] = yv12->v_buffer + recon_uvoffset;

-#if CONFIG_NEWBESTREFMV

   vp9_find_mv_refs(xd, xd->mode_info_context,

                    xd->prev_mode_info_context,

                    frame_type,

@@ -3275,7 +3208,6 @@

                         &frame_best_ref_mv[frame_type],

                         &frame_nearest_mv[frame_type],

                         &frame_near_mv[frame_type]);

-#endif

 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,

@@ -3453,7 +3385,7 @@

 #endif

-    if (sse < threshold) {

+    if ((int)sse < threshold) {

       unsigned int q2dc = xd->block[24].dequant[0];

       /* If there is no codeable 2nd order dc

        or a very small uniform pixel change change */

@@ -3527,10 +3459,10 @@

   return this_rd;  // if 0, this will be re-calculated by caller

-void vp9_rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,

-                            int recon_yoffset, int recon_uvoffset,

-                            int *returnrate, int *returndistortion,

-                            int64_t *returnintra) {

+static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,

+                               int recon_yoffset, int recon_uvoffset,

+                               int *returnrate, int *returndistortion,

+                               int64_t *returnintra) {

   VP9_COMMON *cm = &cpi->common;

   MACROBLOCKD *xd = &x->e_mbd;

   union b_mode_info best_bmodes[16];

@@ -3936,7 +3868,7 @@

                                            second_ref, best_yrd, mdcounts,

                                            &rate, &rate_y, &distortion,

                                            &skippable,

-                                           this_rd_thresh, seg_mvs,

+                                           (int)this_rd_thresh, seg_mvs,

                                            txfm_cache);

       rate2 += rate;

       distortion2 += distortion;

@@ -4153,7 +4085,9 @@

         for (i = 0; i < NB_TXFM_MODES; i++) {

           int64_t adj_rd;

           if (this_mode != B_PRED) {

-            adj_rd = this_rd + txfm_cache[i] - txfm_cache[cm->txfm_mode];

+            const int64_t txfm_mode_diff =

+                txfm_cache[i] - txfm_cache[cm->txfm_mode];

+            adj_rd = this_rd + txfm_mode_diff;

           } else {

             adj_rd = this_rd;

@@ -4268,11 +4202,12 @@

 end:

-  store_coding_context(x, &x->mb_context[xd->mb_index], best_mode_index, &best_partition,

-                       &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame],

-                       &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame],

-                       best_pred_diff[0], best_pred_diff[1], best_pred_diff[2],

-                       best_txfm_diff);

+  store_coding_context(x, &x->mb_context[xd->mb_index],

+    best_mode_index, &best_partition,

+    &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame],

+    &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame],

+    (int)best_pred_diff[0], (int)best_pred_diff[1], (int)best_pred_diff[2],

+    best_txfm_diff);

 #if CONFIG_SUPERBLOCKS

@@ -4834,8 +4769,8 @@

     int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;

-    vp9_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,

-                           &distortion, &intra_error);

+    rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,

+                       &distortion, &intra_error);

     /* restore cpi->zbin_mode_boost_enabled */

     cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;

--- a/vp9/encoder/rdopt.h

+++ b/vp9/encoder/rdopt.h

@@ -17,10 +17,7 @@

 extern void vp9_initialize_rd_consts(VP9_COMP *cpi, int Qvalue);

-extern void vp9_rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,

-                                   int recon_yoffset, int recon_uvoffset,

-                                   int *returnrate, int *returndistortion,

-                                   int64_t *returnintra);

+extern void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex);

 extern void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,

                                    int *r, int *d);

@@ -27,6 +24,14 @@

 extern void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,

                                       int *r, int *d);

+extern void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,

+                                           int recon_yoffset,

+                                           int recon_uvoffset, int *r, int *d);

+extern int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,

+                                         int recon_yoffset, int recon_uvoffset,

+                                         int *returnrate, int *returndist);

 extern void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCKD *xd,

                         const MODE_INFO *here, int_mv *mvp,

--- a/vp9/encoder/temporal_filter.h

+++ b/vp9/encoder/temporal_filter.h

@@ -44,4 +44,8 @@

 #define TEMPORAL_INVOKE(ctx,fn) vp9_temporal_filter_##fn

 #endif

+struct VP9_COMP;

+extern void vp9_temporal_filter_prepare_c(struct VP9_COMP *cpi, int distance);

 #endif // __INC_TEMPORAL_FILTER_H

--- a/vp9/encoder/tokenize.c

+++ b/vp9/encoder/tokenize.c

@@ -48,9 +48,6 @@

                     [PREV_COEF_CONTEXTS][ENTROPY_NODES] [2];

 #endif  /* ENTROPY_STATS */

-void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run);

-void vp9_fix_contexts(MACROBLOCKD *xd);

 static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2];

 const TOKENVALUE *vp9_dct_value_tokens_ptr;

 static int dct_value_cost[DCT_MAX_VALUE * 2];

--- a/vp9/encoder/tokenize.h

+++ b/vp9/encoder/tokenize.h

@@ -29,13 +29,21 @@

   unsigned char   skip_eob_node;

 } TOKENEXTRA;

-int rd_cost_mby(MACROBLOCKD *);

 extern int vp9_mby_is_skippable_4x4(MACROBLOCKD *xd, int has_y2_block);

 extern int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd);

 extern int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd, int has_y2_block);

 extern int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd);

 extern int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd);

+struct VP9_COMP;

+extern void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,

+                            TOKENEXTRA **t, int dry_run);

+extern void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd,

+                         TOKENEXTRA **t, int dry_run);

+extern void vp9_fix_contexts(MACROBLOCKD *xd);

 #ifdef ENTROPY_STATS

 void init_context_counters();

--- a/vp9/encoder/variance_c.c

+++ b/vp9/encoder/variance_c.c

@@ -11,6 +11,7 @@

 #include "variance.h"

 #include "vp9/common/filter.h"

+#include "vp9/common/subpelvar.h"

 unsigned int vp9_get_mb_ss_c(const short *src_ptr) {

@@ -24,32 +25,7 @@

-static void variance(const unsigned char *src_ptr,

-                     int  source_stride,

-                     const unsigned char *ref_ptr,

-                     int  recon_stride,

-                     int  w,

-                     int  h,

-                     unsigned int *sse,

-                     int *sum) {

-  int i, j;

-  int diff;

-  *sum = 0;

-  *sse = 0;

-  for (i = 0; i < h; i++) {

-    for (j = 0; j < w; j++) {

-      diff = src_ptr[j] - ref_ptr[j];

-      *sum += diff;

-      *sse += diff * diff;

-    }

-    src_ptr += source_stride;

-    ref_ptr += recon_stride;

-  }

-}

 #if CONFIG_SUPERBLOCKS

 unsigned int vp9_variance32x32_c(const unsigned char *src_ptr,

                                  int  source_stride,

@@ -146,113 +122,6 @@

-/****************************************************************************

- *

- *  ROUTINE       : filter_block2d_bil_first_pass

- *

- *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.

- *                  UINT32 src_pixels_per_line : Stride of input block.

- *                  UINT32 pixel_step        : Offset between filter input samples (see notes).

- *                  UINT32 output_height     : Input block height.

- *                  UINT32 output_width      : Input block width.

- *                  INT32  *vp9_filter          : Array of 2 bi-linear filter taps.

- *

- *  OUTPUTS       : INT32 *output_ptr        : Pointer to filtered block.

- *

- *  RETURNS       : void

- *

- *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in

- *                  either horizontal or vertical direction to produce the

- *                  filtered output block. Used to implement first-pass

- *                  of 2-D separable filter.

- *

- *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.

- *                  Two filter taps should sum to VP9_FILTER_WEIGHT.

- *                  pixel_step defines whether the filter is applied

- *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).

- *                  It defines the offset required to move from one input

- *                  to the next.

- *

- ****************************************************************************/

-static void var_filter_block2d_bil_first_pass(const unsigned char *src_ptr,

-                                              unsigned short *output_ptr,

-                                              unsigned int src_pixels_per_line,

-                                              int pixel_step,

-                                              unsigned int output_height,

-                                              unsigned int output_width,

-                                              const short *vp9_filter) {

-  unsigned int i, j;

-  for (i = 0; i < output_height; i++) {

-    for (j = 0; j < output_width; j++) {

-      // Apply bilinear filter

-      output_ptr[j] = (((int)src_ptr[0]          * vp9_filter[0]) +

-                       ((int)src_ptr[pixel_step] * vp9_filter[1]) +

-                       (VP9_FILTER_WEIGHT / 2)) >> VP9_FILTER_SHIFT;

-      src_ptr++;

-    }

-    // Next row...

-    src_ptr    += src_pixels_per_line - output_width;

-    output_ptr += output_width;

-  }

-}

-/****************************************************************************

- *

- *  ROUTINE       : filter_block2d_bil_second_pass

- *

- *  INPUTS        : INT32  *src_ptr          : Pointer to source block.

- *                  UINT32 src_pixels_per_line : Stride of input block.

- *                  UINT32 pixel_step        : Offset between filter input samples (see notes).

- *                  UINT32 output_height     : Input block height.

- *                  UINT32 output_width      : Input block width.

- *                  INT32  *vp9_filter          : Array of 2 bi-linear filter taps.

- *

- *  OUTPUTS       : UINT16 *output_ptr       : Pointer to filtered block.

- *

- *  RETURNS       : void

- *

- *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in

- *                  either horizontal or vertical direction to produce the

- *                  filtered output block. Used to implement second-pass

- *                  of 2-D separable filter.

- *

- *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.

- *                  Two filter taps should sum to VP9_FILTER_WEIGHT.

- *                  pixel_step defines whether the filter is applied

- *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).

- *                  It defines the offset required to move from one input

- *                  to the next.

- *

- ****************************************************************************/

-static void var_filter_block2d_bil_second_pass(const unsigned short *src_ptr,

-                                               unsigned char *output_ptr,

-                                               unsigned int src_pixels_per_line,

-                                               unsigned int pixel_step,

-                                               unsigned int output_height,

-                                               unsigned int output_width,

-                                               const short *vp9_filter) {

-  unsigned int  i, j;

-  int  Temp;

-  for (i = 0; i < output_height; i++) {

-    for (j = 0; j < output_width; j++) {

-      // Apply filter

-      Temp = ((int)src_ptr[0]         * vp9_filter[0]) +

-             ((int)src_ptr[pixel_step] * vp9_filter[1]) +

-             (VP9_FILTER_WEIGHT / 2);

-      output_ptr[j] = (unsigned int)(Temp >> VP9_FILTER_SHIFT);

-      src_ptr++;

-    }

-    // Next row...

-    src_ptr    += src_pixels_per_line - output_width;

-    output_ptr += output_width;

-  }

-}

 unsigned int vp9_sub_pixel_variance4x4_c(const unsigned char  *src_ptr,

                                          int  src_pixels_per_line,

                                          int  xoffset,

@@ -469,72 +338,3 @@

   return vp9_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);

-#if CONFIG_NEWBESTREFMV

-unsigned int vp9_variance2x16_c(const unsigned char *src_ptr,

-                                const int  source_stride,

-                                const unsigned char *ref_ptr,

-                                const int  recon_stride,

-                                unsigned int *sse) {

-  unsigned int var;

-  int avg;

-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, &var, &avg);

-  *sse = var;

-  return (var - ((avg * avg) >> 5));

-}

-unsigned int vp9_variance16x2_c(const unsigned char *src_ptr,

-                                const int  source_stride,

-                                const unsigned char *ref_ptr,

-                                const int  recon_stride,

-                                unsigned int *sse) {

-  unsigned int var;

-  int avg;

-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, &var, &avg);

-  *sse = var;

-  return (var - ((avg * avg) >> 5));

-}

-unsigned int vp9_sub_pixel_variance16x2_c(const unsigned char  *src_ptr,

-                                          const int  src_pixels_per_line,

-                                          const int  xoffset,

-                                          const int  yoffset,

-                                          const unsigned char *dst_ptr,

-                                          const int dst_pixels_per_line,

-                                          unsigned int *sse) {

-  unsigned short FData3[16 * 3];  // Temp data bufffer used in filtering

-  unsigned char  temp2[20 * 16];

-  const short *HFilter, *VFilter;

-  HFilter = vp9_bilinear_filters[xoffset];

-  VFilter = vp9_bilinear_filters[yoffset];

-  var_filter_block2d_bil_first_pass(src_ptr, FData3,

-                                    src_pixels_per_line, 1, 3, 16, HFilter);

-  var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter);

-  return vp9_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);

-}

-unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char  *src_ptr,

-                                          const int  src_pixels_per_line,

-                                          const int  xoffset,

-                                          const int  yoffset,

-                                          const unsigned char *dst_ptr,

-                                          const int dst_pixels_per_line,

-                                          unsigned int *sse) {

-  unsigned short FData3[2 * 17];  // Temp data bufffer used in filtering

-  unsigned char  temp2[2 * 16];

-  const short *HFilter, *VFilter;

-  HFilter = vp9_bilinear_filters[xoffset];

-  VFilter = vp9_bilinear_filters[yoffset];

-  var_filter_block2d_bil_first_pass(src_ptr, FData3,

-                                    src_pixels_per_line, 1, 17, 2, HFilter);

-  var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter);

-  return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse);

-}

-#endif

--- a/vp9/encoder/x86/x86_csystemdependent.c

+++ b/vp9/encoder/x86/x86_csystemdependent.c

@@ -95,20 +95,6 @@

 #endif

-#if HAVE_SSE3

-  if (flags & HAS_SSE3) {

-    cpi->rtcd.search.full_search             = vp9_full_search_sadx3;

-    cpi->rtcd.search.diamond_search          = vp9_diamond_search_sadx4;

-    cpi->rtcd.search.refining_search         = vp9_refining_search_sadx4;

-  }

-#endif

-#if HAVE_SSE4_1

-  if (flags & HAS_SSE4_1) {

-    cpi->rtcd.search.full_search             = vp9_full_search_sadx8;

-  }

-#endif

 #endif

--- a/vp9/vp9_common.mk

+++ b/vp9/vp9_common.mk

@@ -53,6 +53,7 @@

 VP9_COMMON_SRCS-yes += common/rtcd.c

 VP9_COMMON_SRCS-yes += common/rtcd_defs.sh

 VP9_COMMON_SRCS-yes += common/sadmxn.h

+VP9_COMMON_SRCS-yes += common/subpelvar.h

 VP9_COMMON_SRCS-yes += common/seg_common.h

 VP9_COMMON_SRCS-yes += common/seg_common.c

 VP9_COMMON_SRCS-yes += common/setupintrarecon.h

--- a/vp9/vp9_cx_iface.c

+++ b/vp9/vp9_cx_iface.c

@@ -179,7 +179,7 @@

   if (cfg->g_pass == VPX_RC_LAST_PASS) {

     size_t           packet_sz = sizeof(FIRSTPASS_STATS);

-    int              n_packets = cfg->rc_twopass_stats_in.sz / packet_sz;

+    int              n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz);

     FIRSTPASS_STATS *stats;

     if (!cfg->rc_twopass_stats_in.buf)

@@ -698,9 +698,9 @@

         pkt.data.frame.pts =

           (dst_time_stamp * ctx->cfg.g_timebase.den + round)

           / ctx->cfg.g_timebase.num / 10000000;

-        pkt.data.frame.duration =

-          (delta * ctx->cfg.g_timebase.den + round)

-          / ctx->cfg.g_timebase.num / 10000000;

+        pkt.data.frame.duration = (unsigned long)

+          ((delta * ctx->cfg.g_timebase.den + round)

+          / ctx->cfg.g_timebase.num / 10000000);

         pkt.data.frame.flags = lib_flags << 16;

         if (lib_flags & FRAMEFLAGS_KEY)

--- a/vpx_scale/vpx_scale.mk

+++ b/vpx_scale/vpx_scale.mk

@@ -5,7 +5,6 @@

 SCALE_SRCS-yes += generic/vpxscale.c

 SCALE_SRCS-yes += generic/yv12config.c

 SCALE_SRCS-yes += generic/yv12extend.c

-SCALE_SRCS-yes += generic/yv12extend_generic.h

 SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c

 #neon

--

⑨