Code: Select all
// inspired by https://www.youtube.com/watch?v=fdCAsZkjpSg
inline double process (double input) noexcept
{
buffer [offset] = input;
double y = 0.0;
int i;
int n = 0;
for (i = offset; i != -1; i--)
y += taps[n++] * buffer[i];
for (i = numTaps-1; i != offset; i--)
y += taps[n++] * buffer[i];
if (++offset >= numTaps) offset = 0;
return y;
}
I''ve trying to implement this with sse2 intrinsics, but I don't get any improvements.
Code: Select all
// y, a, b, mul: __m128d
// in the for loop:
a = _mm_loadu_pd (taps + n++);
b = _mm_loadu_pd (buffer + i);
mul = _mm_mul_pd (a, b);
y = _mm_add_pd (y, mul);
I've tried too cast taps[] and buffer[] to __m128d outside the for loop
Code: Select all
__m128d* t = (__m128d*)taps; // and __m128* t = reinterpret_cast<__m128*>(taps)
__m128d* b = (__m128d*)buffer;
// and then in the for loop:
mul = _mm_mul_pd (*(t+n++), *(b+i));
y = _mm_add_pd (y, mul);
Code: Select all
cArray[] {1,2,3,4,5};
*(m128Array + 0) == 1
*(m128Array + 1) == 3
*(m128Array + 2) == 5