ref: b5f940169c2968b54e8d48cf880f5999c5932922
parent: cf2577d4bc284fb5c5b71377413a47c72a1362dc
author: Chris Moeller <kode54@gmail.com>
date: Tue Jun 11 23:45:06 EDT 2013
Re-enabled SSE support and rewrote SSE filter function
--- a/dumb/src/it/itrender.c
+++ b/dumb/src/it/itrender.c
@@ -747,19 +747,22 @@
dumb_record_click(cr, pos, startstep);
}
- data = _mm_cvtsi32_ss( _mm_setzero_ps(), prevsample );
- data = _mm_cvtsi32_ss( _mm_shuffle_ps( data, data, _MM_SHUFFLE(0, 0, 0, 0) ), currsample );
+ temp1 = _mm_setzero_ps();
+ data = _mm_cvtsi32_ss( temp1, currsample );
+ temp2 = _mm_cvtsi32_ss( temp1, prevsample );
impulse = _mm_loadu_ps( (const float *) &imp );
- temp1 = _mm_shuffle_ps( data, data, _MM_SHUFFLE(0, 1, 0, 0) );
+ data = _mm_shuffle_ps( data, temp2, _MM_SHUFFLE(1, 0, 0, 1) );
for (i = 0; i < datasize; i += step) {
- data = _mm_cvtsi32_ss( temp1, src [i] );
- temp1 = _mm_mul_ps( data, impulse );
- temp2 = _mm_movehl_ps( temp1, temp1 );
+ temp1 = _mm_cvtsi32_ss( data, src [i] );
+ temp1 = _mm_mul_ps( temp1, impulse );
+ temp2 = _mm_movehl_ps( temp2, temp1 );
temp1 = _mm_add_ps( temp1, temp2 );
- temp2 = _mm_shuffle_ps( temp1, temp1, _MM_SHUFFLE(0, 0, 0, 1) );
+ temp2 = temp1;
+ temp2 = _mm_shuffle_ps( temp2, temp1, _MM_SHUFFLE(0, 0, 0, 1) );
temp1 = _mm_add_ps( temp1, temp2 );
- temp1 = _mm_shuffle_ps( temp1, data, _MM_SHUFFLE(0, 1, 0, 0) );
+ temp1 = _mm_shuffle_ps( temp1, data, _MM_SHUFFLE(2, 1, 0, 0) );
+ data = temp1;
dst [i] += _mm_cvtss_si32( temp1 );
}
--- a/dumb/vc6/dumb/dumb.vcxproj
+++ b/dumb/vc6/dumb/dumb.vcxproj
@@ -55,7 +55,7 @@
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>_DEBUG;WIN32;_LIB;DUMB_DECLARE_DEPRECATED;DEBUGMODE=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>_USE_SSE;_DEBUG;WIN32;_LIB;DUMB_DECLARE_DEPRECATED;DEBUGMODE=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<WarningLevel>Level3</WarningLevel>
@@ -76,7 +76,7 @@
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<AdditionalIncludeDirectories>../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>NDEBUG;WIN32;_LIB;DUMB_DECLARE_DEPRECATED;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>_USE_SSE;NDEBUG;WIN32;_LIB;DUMB_DECLARE_DEPRECATED;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>