ref: 3d8482486e345da38f70c021c5a9c905e360cf55
dir: /codec/common/inc/asmdefs_mmi.h/
/*! * \copy * Copyright (c) 2013, Loongson Technology Co.,Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #ifndef ASMDEFS_MMI_H_ #define ASMDEFS_MMI_H_ #define CACHE_LINE_SIZE 32 #if defined(_ABI64) && _MIPS_SIM == _ABI64 # define mips_reg int64_t # define PTRSIZE " 8 " # define PTRLOG " 3 " # define PTR_ADDU "daddu " # define PTR_ADDIU "daddiu " # define PTR_ADDI "daddi " # define PTR_SUBU "dsubu " # define PTR_L "ld " # define PTR_S "sd " # define PTR_SRA "dsra " # define PTR_SRL "dsrl " # define PTR_SLL "dsll " #else # define mips_reg int32_t # define PTRSIZE " 4 " # define PTRLOG " 2 " # define PTR_ADDU "addu " # define PTR_ADDIU "addiu " # define PTR_ADDI "addi " # define PTR_SUBU "subu " # define PTR_L "lw " # define PTR_S "sw " # define PTR_SRA "sra " # define PTR_SRL "srl " # define PTR_SLL "sll " #endif #define MMI_XSawp_BH(f0, f2, f4, f6, f8, f10) \ "mov.d "#f8", "#f2" \n\t" \ "punpckhbh "#f2", "#f0", "#f4" \n\t" \ "punpcklbh "#f0", "#f0", "#f4" \n\t" \ "punpckhbh "#f10", "#f8", "#f6" \n\t" \ "punpcklbh "#f8", "#f8", "#f6" \n\t" #define MMI_XSawp_HW(f0, f2, f4, f6, f8, f10) \ "mov.d "#f8", "#f2" \n\t" \ "punpckhhw "#f2", "#f0", "#f4" \n\t" \ "punpcklhw "#f0", "#f0", "#f4" \n\t" \ "punpckhhw "#f10", "#f8", "#f6" \n\t" \ "punpcklhw "#f8", "#f8", "#f6" \n\t" #define MMI_XSawp_WD(f0, f2, f4, f6, f8, f10) \ "mov.d "#f8", "#f2" \n\t" \ "punpckhwd "#f2", "#f0", "#f4" \n\t" \ "punpcklwd "#f0", "#f0", "#f4" \n\t" \ "punpckhwd "#f10", "#f8", "#f6" \n\t" \ "punpcklwd "#f8", "#f8", "#f6" \n\t" #define MMI_XSawp_DQ(f0, f2, f4, f6, f8, f10) \ "mov.d "#f8", "#f2" \n\t" \ "mov.d "#f2", "#f4" \n\t" \ "mov.d "#f10", "#f6" \n\t" #define WELS_AbsH(f0, f2, f4, f6, f8, f10) \ "xor "#f8", "#f8", "#f8" \n\t" \ "psubh "#f10", "#f8", "#f6" \n\t" \ "psubh "#f8", "#f8", "#f4" \n\t" \ "pmaxsh "#f0", "#f4", "#f8" \n\t" \ "pmaxsh "#f2", "#f6", "#f10" \n\t" #define MMI_SumSub(f0, f2, f4, f6, f8, f10) \ "mov.d "#f8", "#f4" \n\t" \ "mov.d "#f10", "#f6" \n\t" \ "paddh "#f4", "#f4", "#f0" \n\t" \ "paddh "#f6", "#f6", "#f2" \n\t" \ "psubh "#f0", "#f0", "#f8" \n\t" \ "psubh "#f2", "#f2", "#f10" \n\t" #define MMI_LoadDiff8P(f0, f2, f4, f6, f8, r0, r1) \ "gsldlc1 "#f0", 0x7("#r0") \n\t" \ "gsldlc1 "#f4", 0x7("#r1") \n\t" \ "gsldrc1 "#f0", 0x0("#r0") \n\t" \ "gsldrc1 "#f4", 0x0("#r1") \n\t" \ "punpckhbh "#f2", "#f0", "#f8" \n\t" \ "punpcklbh "#f0", "#f0", "#f8" \n\t" \ "punpckhbh "#f6", "#f4", "#f8" \n\t" \ "punpcklbh "#f4", "#f4", "#f8" \n\t" \ "psubh "#f0", "#f0", "#f4" \n\t" \ "psubh "#f2", "#f2", "#f6" \n\t" #define MMI_TransTwo4x4H(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18) \ MMI_XSawp_HW(f0, f2, f4, f6, f16, f18) \ MMI_XSawp_HW(f8, f10, f12, f14, f4, f6) \ MMI_XSawp_WD(f0, f2, f8, f10, f12, f14) \ MMI_XSawp_WD(f16, f18, f4, f6, f8, f10) \ MMI_XSawp_DQ(f0, f2, f16, f18, f4, f6) \ MMI_XSawp_DQ(f12, f14, f8, f10, f16, f18) #define MMI_TransTwo8x8B(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, f22, f24, f26, f28, f30, r0, r1) \ "dmfc1 "#r0", "#f28" \n\t" \ "dmfc1 "#r1", "#f30" \n\t" \ MMI_XSawp_BH(f0, f2, f4, f6, f28, f30) \ MMI_XSawp_BH(f8, f10, f12, f14, f4, f6) \ MMI_XSawp_BH(f16, f18, f20, f22, f12, f14) \ "dmtc1 "#r0", "#f20" \n\t" \ "dmtc1 "#r1", "#f22" \n\t" \ "dmfc1 "#r0", "#f12" \n\t" \ "dmfc1 "#r1", "#f14" \n\t" \ MMI_XSawp_BH(f24, f26, f20, f22, f12, f14) \ MMI_XSawp_HW(f0, f2, f8, f10, f20, f22) \ MMI_XSawp_HW(f28, f30, f4, f6, f8, f10) \ MMI_XSawp_HW(f16, f18, f24, f26, f4, f6) \ "dmtc1 "#r0", "#f24" \n\t" \ "dmtc1 "#r1", "#f26" \n\t" \ "dmfc1 "#r0", "#f8" \n\t" \ "dmfc1 "#r1", "#f10" \n\t" \ MMI_XSawp_HW(f24, f26, f12, f14, f8, f10) \ MMI_XSawp_WD(f0, f2, f16, f18, f12, f14) \ MMI_XSawp_WD(f20, f22, f4, f6, f16, f18) \ MMI_XSawp_WD(f28, f30, f24, f26, f4, f6) \ "dmtc1 "#r0", "#f24" \n\t" \ "dmtc1 "#r1", "#f26" \n\t" \ "dmfc1 "#r0", "#f16" \n\t" \ "dmfc1 "#r1", "#f18" \n\t" \ MMI_XSawp_WD(f24, f26, f8, f10, f16, f18) \ MMI_XSawp_DQ(f0, f2, f28, f30, f8, f10) \ MMI_XSawp_DQ(f12, f14, f4, f6, f28, f30) \ MMI_XSawp_DQ(f20, f22, f24, f26, f4, f6) \ "dmtc1 "#r0", "#f24" \n\t" \ "dmtc1 "#r1", "#f26" \n\t" \ "dmfc1 "#r0", "#f0" \n\t" \ "dmfc1 "#r1", "#f2" \n\t" \ MMI_XSawp_DQ(f24, f26, f16, f18, f0, f2) \ "dmtc1 "#r0", "#f16" \n\t" \ "dmtc1 "#r1", "#f18" \n\t" #define MMI_XSwap_HW_SINGLE(f0, f2, f4) \ "punpckhhw "#f4", "#f0", "#f2" \n\t" \ "punpcklhw "#f0", "#f0", "#f2" \n\t" #define MMI_XSwap_WD_SINGLE(f0, f2, f4) \ "punpckhwd "#f4", "#f0", "#f2" \n\t" \ "punpcklwd "#f0", "#f0", "#f2" \n\t" #define MMI_Trans4x4H_SINGLE(f0, f2, f4, f6, f8) \ MMI_XSwap_HW_SINGLE(f0, f2, f8) \ MMI_XSwap_HW_SINGLE(f4, f6, f2) \ MMI_XSwap_WD_SINGLE(f0, f4, f6) \ MMI_XSwap_WD_SINGLE(f8, f2, f4) #define MMI_SumSub_SINGLE(f0, f2, f4) \ "mov.d "#f4", "#f2" \n\t" \ "psubh "#f2", "#f2", "#f0" \n\t" \ "paddh "#f0", "#f0", "#f4" \n\t" #define MMI_SumSubMul2_SINGLE(f0, f2, f4, f6) \ "mov.d "#f4", "#f0" \n\t" \ "psllh "#f0", "#f0", "#f6" \n\t" \ "paddh "#f0", "#f0", "#f2" \n\t" \ "psllh "#f2", "#f2", "#f6" \n\t" \ "psubh "#f4", "#f4", "#f2" \n\t" //f4 should be 0x0 #define MMI_Copy8Times(f0, f2, f4, r0) \ "dmtc1 "#r0", "#f0" \n\t" \ "pshufh "#f0", "#f0", "#f4" \n\t" \ "mov.d "#f2", "#f0" \n\t" //f4 should be 0x0 #define MMI_Copy16Times(f0, f2, f4, r0) \ "dmtc1 "#r0", "#f0" \n\t" \ "punpcklbh "#f0", "#f0", "#f0" \n\t" \ "pshufh "#f0", "#f0", "#f4" \n\t" \ "mov.d "#f2", "#f0" \n\t" #define MMI_SumSubDiv2_SINGLE(f0, f2, f4, f6) \ "psrah "#f4", "#f2", "#f6" \n\t" \ "paddh "#f4", "#f4", "#f0" \n\t" \ "psrah "#f0", "#f0", "#f6" \n\t" \ "psubh "#f0", "#f0", "#f2" \n\t" #define MMI_IDCT_SINGLE(f0, f2, f4, f6, f8, f10, f12) \ MMI_SumSub_SINGLE(f6, f8, f10) \ MMI_SumSubDiv2_SINGLE(f4, f2, f0, f12) \ MMI_SumSub_SINGLE(f0, f6, f10) \ MMI_SumSub_SINGLE(f4, f8, f10) #define MMI_StoreDiff4P_SINGLE(f0, f2, f4, f6, r0, r1, f8) \ "gsldlc1 "#f2", 0x7("#r1") \n\t" \ "gsldrc1 "#f2", 0x0("#r1") \n\t" \ "punpcklbh "#f2", "#f2", "#f6" \n\t" \ "paddh "#f0", "#f0", "#f4" \n\t" \ "psrah "#f0", "#f0", "#f8" \n\t" \ "paddsh "#f0", "#f0", "#f2" \n\t" \ "packushb "#f0", "#f0", "#f2" \n\t" \ "gsswlc1 "#f0", 0x3("#r0") \n\t" \ "gsswrc1 "#f0", 0x0("#r0") \n\t" #define SUMH_HORIZON(f0, f2, f4, f6, f8) \ "paddh "#f0", "#f0", "#f2" \n\t" \ "punpckhhw "#f2", "#f0", "#f8" \n\t" \ "punpcklhw "#f0", "#f0", "#f8" \n\t" \ "paddw "#f0", "#f0", "#f2" \n\t" \ "punpckhwd "#f2", "#f0", "#f0" \n\t" \ "paddw "#f0", "#f0", "#f2" \n\t" #define LOAD_COLUMN(f0, f2, f4, f6, f8, f10, f12, f14, r0, r1, r2) \ "daddu "#r2", "#r0", "#r1" \n\t" \ "gsldlc1 "#f0", 0x7("#r0") \n\t" \ "gsldlc1 "#f4", 0x7("#r2") \n\t" \ "gsldrc1 "#f0", 0x0("#r0") \n\t" \ "gsldrc1 "#f4", 0x0("#r2") \n\t" \ "punpcklbh "#f0", "#f0", "#f4" \n\t" \ "daddu "#r0", "#r2", "#r1" \n\t" \ "daddu "#r2", "#r0", "#r1" \n\t" \ "gsldlc1 "#f8", 0x7("#r0") \n\t" \ "gsldlc1 "#f4", 0x7("#r2") \n\t" \ "gsldrc1 "#f8", 0x0("#r0") \n\t" \ "gsldrc1 "#f4", 0x0("#r2") \n\t" \ "punpcklbh "#f8", "#f8", "#f4" \n\t" \ "punpckhhw "#f2", "#f0", "#f8" \n\t" \ "punpcklhw "#f0", "#f0", "#f8" \n\t" \ "daddu "#r0", "#r2", "#r1" \n\t" \ "daddu "#r2", "#r0", "#r1" \n\t" \ "gsldlc1 "#f12", 0x7("#r0") \n\t" \ "gsldlc1 "#f4", 0x7("#r2") \n\t" \ "gsldrc1 "#f12", 0x0("#r0") \n\t" \ "gsldrc1 "#f4", 0x0("#r2") \n\t" \ "punpcklbh "#f12", "#f12", "#f4" \n\t" \ "daddu "#r0", "#r2", "#r1" \n\t" \ "daddu "#r2", "#r0", "#r1" \n\t" \ "gsldlc1 "#f8", 0x7("#r0") \n\t" \ "gsldlc1 "#f4", 0x7("#r2") \n\t" \ "gsldrc1 "#f8", 0x0("#r0") \n\t" \ "gsldrc1 "#f4", 0x0("#r2") \n\t" \ "punpcklbh "#f8", "#f8", "#f4" \n\t" \ "punpckhhw "#f14", "#f12", "#f8" \n\t" \ "punpcklhw "#f12", "#f12", "#f8" \n\t" \ "daddu "#r0", "#r2", "#r1" \n\t" \ "punpcklwd "#f0", "#f2", "#f14" \n\t" \ "punpckhwd "#f2", "#f2", "#f14" \n\t" #define LOAD_COLUMN_C(f0, f2, f4, f6, r0, r1, r2) \ "daddu "#r2", "#r0", "#r1" \n\t" \ "gsldlc1 "#f0", 0x7("#r0") \n\t" \ "gsldlc1 "#f2", 0x7("#r2") \n\t" \ "gsldrc1 "#f0", 0x0("#r0") \n\t" \ "gsldrc1 "#f2", 0x0("#r2") \n\t" \ "punpcklbh "#f0", "#f0", "#f2" \n\t" \ "daddu "#r0", "#r2", "#r1" \n\t" \ "daddu "#r2", "#r0", "#r1" \n\t" \ "gsldlc1 "#f4", 0x7("#r0") \n\t" \ "gsldlc1 "#f2", 0x7("#r2") \n\t" \ "gsldrc1 "#f4", 0x0("#r0") \n\t" \ "gsldrc1 "#f2", 0x0("#r2") \n\t" \ "punpcklbh "#f4", "#f4", "#f2" \n\t" \ "punpckhhw "#f0", "#f0", "#f4" \n\t" \ "daddu "#r0", "#r2", "#r1" \n\t" /** * backup register */ #define BACKUP_REG \ double __attribute__((aligned(16))) __back_temp[8]; \ if (_MIPS_SIM == _ABI64) \ __asm__ volatile ( \ "gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \ "gssqc1 $f27, $f26, 0x10(%[temp]) \n\t" \ "gssqc1 $f29, $f28, 0x20(%[temp]) \n\t" \ "gssqc1 $f31, $f30, 0x30(%[temp]) \n\t" \ : \ : [temp]"r"(__back_temp) \ : "memory" \ ); \ else \ __asm__ volatile ( \ "gssqc1 $f22, $f20, 0x00(%[temp]) \n\t" \ "gssqc1 $f26, $f24, 0x10(%[temp]) \n\t" \ "gssqc1 $f30, $f28, 0x20(%[temp]) \n\t" \ : \ : [temp]"r"(__back_temp) \ : "memory" \ ); /** * recover register */ #define RECOVER_REG \ if (_MIPS_SIM == _ABI64) \ __asm__ volatile ( \ "gslqc1 $f25, $f24, 0x00(%[temp]) \n\t" \ "gslqc1 $f27, $f26, 0x10(%[temp]) \n\t" \ "gslqc1 $f29, $f28, 0x20(%[temp]) \n\t" \ "gslqc1 $f31, $f30, 0x30(%[temp]) \n\t" \ : \ : [temp]"r"(__back_temp) \ : "memory" \ ); \ else \ __asm__ volatile ( \ "gslqc1 $f22, $f20, 0x00(%[temp]) \n\t" \ "gslqc1 $f26, $f24, 0x10(%[temp]) \n\t" \ "gslqc1 $f30, $f28, 0x20(%[temp]) \n\t" \ : \ : [temp]"r"(__back_temp) \ : "memory" \ ); # define OK 1 # define NOTOK 0 #endif /* ASMDEFS_MMI_H_ */