shithub: openh264

Download patch

ref: 97f795c4b596f863f255bde4f64c9d7c1c711d58
parent: a5839939424a9deae3554b5f2204a058e47c6df9
author: gxw <guxiwei-hf@loongson.cn>
date: Tue Oct 22 15:20:19 EDT 2019

Adjust the mmi/msa detection mode for mips platform.

Using mips-simd-check.sh to test the current compiler support mmi/msa
or not before make. If supported, enable mmi/msa.
According to the model name in /proc/cpuinfo to test the current
cpu support mmi/msa or not for runtime detection.
Now We can use the following make instructions on mips platform:
1. make (automatic detection mmi/msa)
2. make ENABLE_MMI=No (disable mmi)
3. make ENABLE_MSA=No (disable msa)
4. make ENABLE_MMI=No ENABLE_MSA=No (disable mmi and msa)

Change-Id: Ibd348ebc11912d7fca1b548c76838675d69b7c40
Signed-off-by: gxw <guxiwei-hf@loongson.cn>

Change-Id: I721119c55bdd4d472675e07add17bf8d0ecdbc63

--- a/build/arch.mk
+++ b/build/arch.mk
@@ -30,14 +30,26 @@
 endif
 endif
 
-#for loongson
+#for mips
 ifneq ($(filter mips mips64, $(ARCH)),)
 ifeq ($(USE_ASM), Yes)
+ENABLE_MMI=Yes
+ENABLE_MSA=Yes
 ASM_ARCH = mips
 ASMFLAGS += -I$(SRC_PATH)codec/common/mips/
-LOONGSON3A = $(shell g++ -dM -E - < /dev/null | grep '_MIPS_TUNE ' | cut -f 3 -d " ")
-ifeq ($(LOONGSON3A), "loongson3a")
-CFLAGS += -DHAVE_MMI
+#mmi
+ifeq ($(ENABLE_MMI), Yes)
+ENABLE_MMI = $(shell $(SRC_PATH)build/mips-simd-check.sh $(CC) mmi)
+ifeq ($(ENABLE_MMI), Yes)
+CFLAGS += -DHAVE_MMI -Wa,-mloongson-mmi,-mloongson-ext
+endif
+endif
+#msa
+ifeq ($(ENABLE_MSA), Yes)
+ENABLE_MSA = $(shell $(SRC_PATH)build/mips-simd-check.sh $(CC) msa)
+ifeq ($(ENABLE_MSA), Yes)
+CFLAGS += -DHAVE_MSA -mmsa
+endif
 endif
 endif
 endif
--- /dev/null
+++ b/build/mips-simd-check.sh
@@ -1,0 +1,32 @@
+#!/bin/bash
+#**********************************************************************************
+#    This script is using in build/arch.mk for mips to detect the simd instructions:
+#    mmi, msa (maybe more in the future).
+#
+#   --usage:
+#             ./mips-simd-check.sh $(CC) mmi
+#         or  ./mips-simd-check.sh $(CC) msa
+#
+# date:  10/17/2019 Created
+#**********************************************************************************
+
+TMPC=$(mktemp tmp.XXXXXX.c)
+TMPO=$(mktemp tmp.XXXXXX.o)
+if [ $2 == "mmi" ]
+then
+   echo "void main(void){ __asm__ volatile(\"punpcklhw \$f0, \$f0, \$f0\"); }" > $TMPC
+   $1 -Wa,-mloongson-mmi $TMPC -o $TMPO &> /dev/null
+   if test -s $TMPO
+   then
+      echo "Yes"
+   fi
+elif [ $2 == "msa" ]
+then
+   echo "void main(void){ __asm__ volatile(\"addvi.b \$w0, \$w1, 1\"); }" > $TMPC
+   $1 -mmsa $TMPC -o $TMPO &> /dev/null
+   if test -s $TMPO
+   then
+      echo "Yes"
+   fi
+fi
+rm -f $TMPC $TMPO
--- a/build/mktargets.py
+++ b/build/mktargets.py
@@ -119,9 +119,9 @@
         armfiles.append(file)
 mipsfiles = []
 for file in cfiles:
-  c = file.split('/')
-  if 'mips' in c:
-    mipsfiles.append(file)
+    c = file.split('/')
+    if 'mips' in c:
+        mipsfiles.append(file)
 cfiles = [x for x in cfiles if x not in mipsfiles]
 
 
@@ -181,15 +181,34 @@
     f.write("OBJS += $(%s_OBJSARM64)\n\n"%(PREFIX))
 
 if len(mipsfiles) > 0:
-  f.write("%s_ASM_MIPS_SRCS=\\\n"%(PREFIX))
-  for c in mipsfiles:
-    f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
-  f.write("\n")
-  f.write("%s_OBJSMIPS += $(%s_ASM_MIPS_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX))
-  f.write("ifeq ($(ASM_ARCH), mips)\n")
-  f.write("%s_OBJS += $(%s_OBJSMIPS)\n"%(PREFIX,PREFIX))
-  f.write("endif\n")
-  f.write("OBJS += $(%s_OBJSMIPS)\n\n"%(PREFIX))
+    mmifiles = []
+    for file in mipsfiles:
+        if '_mmi' in file:
+            mmifiles.append(file)
+    f.write("%s_ASM_MIPS_MMI_SRCS=\\\n"%(PREFIX))
+    for c in mmifiles:
+        f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
+    f.write("\n")
+    f.write("%s_OBJSMIPS_MMI += $(%s_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))\n\n"%(PREFIX, PREFIX))
+    msafiles = []
+    for file in mipsfiles:
+        if '_msa' in file:
+            msafiles.append(file)
+    f.write("%s_ASM_MIPS_MSA_SRCS=\\\n"%(PREFIX))
+    for c in msafiles:
+        f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
+    f.write("\n")
+    f.write("%s_OBJSMIPS_MSA += $(%s_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX))
+    f.write("ifeq ($(ASM_ARCH), mips)\n")
+    f.write("ifeq ($(ENABLE_MMI), Yes)\n")
+    f.write("%s_OBJS += $(%s_OBJSMIPS_MMI)\n"%(PREFIX,PREFIX))
+    f.write("endif\n")
+    f.write("ifeq ($(ENABLE_MSA), Yes)\n")
+    f.write("%s_OBJS += $(%s_OBJSMIPS_MSA)\n"%(PREFIX,PREFIX))
+    f.write("endif\n")
+    f.write("endif\n")
+    f.write("OBJS += $(%s_OBJSMIPS_MMI)\n"%(PREFIX))
+    f.write("OBJS += $(%s_OBJSMIPS_MSA)\n\n"%(PREFIX))
 
 f.write("OBJS += $(%s_OBJS)\n\n"%(PREFIX))
 write_cpp_rule_pattern(f)
--- a/codec/common/inc/cpu_core.h
+++ b/codec/common/inc/cpu_core.h
@@ -86,6 +86,7 @@
 
 /* For loongson */
 #define WELS_CPU_MMI        0x00000001  /* mmi */
+#define WELS_CPU_MSA        0x00000002  /* msa */
 
 /*
  *  Interfaces for CPU core feature detection as below
--- a/codec/common/src/cpu.cpp
+++ b/codec/common/src/cpu.cpp
@@ -309,12 +309,45 @@
 
 #elif defined(mips)
 /* for loongson */
+static uint32_t get_cpu_flags_from_cpuinfo(void)
+{
+    uint32_t flags = 0;
+
+# ifdef __linux__
+    FILE* fp = fopen("/proc/cpuinfo", "r");
+    if (!fp)
+        return flags;
+
+    char buf[200];
+    memset(buf, 0, sizeof(buf));
+    while (fgets(buf, sizeof(buf), fp)) {
+        if (!strncmp(buf, "model name", strlen("model name"))) {
+            if (strstr(buf, "Loongson-3A") || strstr(buf, "Loongson-3B") ||
+                strstr(buf, "Loongson-2K")) {
+                flags |= WELS_CPU_MMI;
+            }
+            break;
+        }
+    }
+    while (fgets(buf, sizeof(buf), fp)) {
+        if(!strncmp(buf, "ASEs implemented", strlen("ASEs implemented"))) {
+            if (strstr(buf, "loongson-mmi") || strstr(buf, "loongson-ext")) {
+                flags |= WELS_CPU_MMI;
+            }
+            if (strstr(buf, "msa")) {
+                flags |= WELS_CPU_MSA;
+            }
+            break;
+        }
+    }
+    fclose(fp);
+# endif
+
+    return flags;
+}
+
 uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
-#if defined(HAVE_MMI)
-  return WELS_CPU_MMI;
-#else
-  return 0;
-#endif
+    return get_cpu_flags_from_cpuinfo();
 }
 
 #else /* Neither X86_ASM, HAVE_NEON, HAVE_NEON_AARCH64 nor mips */
@@ -324,5 +357,3 @@
 }
 
 #endif
-
-
--- a/codec/common/targets.mk
+++ b/codec/common/targets.mk
@@ -66,7 +66,7 @@
 endif
 OBJS += $(COMMON_OBJSARM64)
 
-COMMON_ASM_MIPS_SRCS=\
+COMMON_ASM_MIPS_MMI_SRCS=\
 	$(COMMON_SRCDIR)/mips/copy_mb_mmi.c\
 	$(COMMON_SRCDIR)/mips/deblock_mmi.c\
 	$(COMMON_SRCDIR)/mips/expand_picture_mmi.c\
@@ -73,11 +73,21 @@
 	$(COMMON_SRCDIR)/mips/intra_pred_com_mmi.c\
 	$(COMMON_SRCDIR)/mips/satd_sad_mmi.c\
 
-COMMON_OBJSMIPS += $(COMMON_ASM_MIPS_SRCS:.c=.$(OBJ))
+COMMON_OBJSMIPS_MMI += $(COMMON_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
+
+COMMON_ASM_MIPS_MSA_SRCS=\
+
+COMMON_OBJSMIPS_MSA += $(COMMON_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
 ifeq ($(ASM_ARCH), mips)
-COMMON_OBJS += $(COMMON_OBJSMIPS)
+ifeq ($(ENABLE_MMI), Yes)
+COMMON_OBJS += $(COMMON_OBJSMIPS_MMI)
 endif
-OBJS += $(COMMON_OBJSMIPS)
+ifeq ($(ENABLE_MSA), Yes)
+COMMON_OBJS += $(COMMON_OBJSMIPS_MSA)
+endif
+endif
+OBJS += $(COMMON_OBJSMIPS_MMI)
+OBJS += $(COMMON_OBJSMIPS_MSA)
 
 OBJS += $(COMMON_OBJS)
 
--- a/codec/decoder/targets.mk
+++ b/codec/decoder/targets.mk
@@ -57,14 +57,24 @@
 endif
 OBJS += $(DECODER_OBJSARM64)
 
-DECODER_ASM_MIPS_SRCS=\
+DECODER_ASM_MIPS_MMI_SRCS=\
 	$(DECODER_SRCDIR)/core/mips/dct_mmi.c\
 
-DECODER_OBJSMIPS += $(DECODER_ASM_MIPS_SRCS:.c=.$(OBJ))
+DECODER_OBJSMIPS_MMI += $(DECODER_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
+
+DECODER_ASM_MIPS_MSA_SRCS=\
+
+DECODER_OBJSMIPS_MSA += $(DECODER_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
 ifeq ($(ASM_ARCH), mips)
-DECODER_OBJS += $(DECODER_OBJSMIPS)
+ifeq ($(ENABLE_MMI), Yes)
+DECODER_OBJS += $(DECODER_OBJSMIPS_MMI)
 endif
-OBJS += $(DECODER_OBJSMIPS)
+ifeq ($(ENABLE_MSA), Yes)
+DECODER_OBJS += $(DECODER_OBJSMIPS_MSA)
+endif
+endif
+OBJS += $(DECODER_OBJSMIPS_MMI)
+OBJS += $(DECODER_OBJSMIPS_MSA)
 
 OBJS += $(DECODER_OBJS)
 
--- a/codec/encoder/targets.mk
+++ b/codec/encoder/targets.mk
@@ -82,16 +82,26 @@
 endif
 OBJS += $(ENCODER_OBJSARM64)
 
-ENCODER_ASM_MIPS_SRCS=\
+ENCODER_ASM_MIPS_MMI_SRCS=\
 	$(ENCODER_SRCDIR)/core/mips/dct_mmi.c\
 	$(ENCODER_SRCDIR)/core/mips/quant_mmi.c\
 	$(ENCODER_SRCDIR)/core/mips/score_mmi.c\
 
-ENCODER_OBJSMIPS += $(ENCODER_ASM_MIPS_SRCS:.c=.$(OBJ))
+ENCODER_OBJSMIPS_MMI += $(ENCODER_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
+
+ENCODER_ASM_MIPS_MSA_SRCS=\
+
+ENCODER_OBJSMIPS_MSA += $(ENCODER_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
 ifeq ($(ASM_ARCH), mips)
-ENCODER_OBJS += $(ENCODER_OBJSMIPS)
+ifeq ($(ENABLE_MMI), Yes)
+ENCODER_OBJS += $(ENCODER_OBJSMIPS_MMI)
 endif
-OBJS += $(ENCODER_OBJSMIPS)
+ifeq ($(ENABLE_MSA), Yes)
+ENCODER_OBJS += $(ENCODER_OBJSMIPS_MSA)
+endif
+endif
+OBJS += $(ENCODER_OBJSMIPS_MMI)
+OBJS += $(ENCODER_OBJSMIPS_MSA)
 
 OBJS += $(ENCODER_OBJS)
 
--- a/codec/processing/targets.mk
+++ b/codec/processing/targets.mk
@@ -58,14 +58,24 @@
 endif
 OBJS += $(PROCESSING_OBJSARM64)
 
-PROCESSING_ASM_MIPS_SRCS=\
+PROCESSING_ASM_MIPS_MMI_SRCS=\
 	$(PROCESSING_SRCDIR)/src/mips/vaa_mmi.c\
 
-PROCESSING_OBJSMIPS += $(PROCESSING_ASM_MIPS_SRCS:.c=.$(OBJ))
+PROCESSING_OBJSMIPS_MMI += $(PROCESSING_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
+
+PROCESSING_ASM_MIPS_MSA_SRCS=\
+
+PROCESSING_OBJSMIPS_MSA += $(PROCESSING_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
 ifeq ($(ASM_ARCH), mips)
-PROCESSING_OBJS += $(PROCESSING_OBJSMIPS)
+ifeq ($(ENABLE_MMI), Yes)
+PROCESSING_OBJS += $(PROCESSING_OBJSMIPS_MMI)
 endif
-OBJS += $(PROCESSING_OBJSMIPS)
+ifeq ($(ENABLE_MSA), Yes)
+PROCESSING_OBJS += $(PROCESSING_OBJSMIPS_MSA)
+endif
+endif
+OBJS += $(PROCESSING_OBJSMIPS_MMI)
+OBJS += $(PROCESSING_OBJSMIPS_MSA)
 
 OBJS += $(PROCESSING_OBJS)