Browse Source

feat: add SIMD variation for Vector4DotProduct

pull/4599/head
Alex Murkoff 5 days ago
parent
commit
fc3d254d63
1 changed files with 19 additions and 1 deletions
  1. +19
    -1
      src/raymath.h

+ 19
- 1
src/raymath.h View File

@ -1336,7 +1336,25 @@ RMAPI float Vector4LengthSqr(Vector4 v)
RMAPI float Vector4DotProduct(Vector4 v1, Vector4 v2)
{
float result = (v1.x*v2.x + v1.y*v2.y + v1.z*v2.z + v1.w*v2.w);
float result = 0.0f;
#if defined (__SSE2__) && defined(RL_USE_SIMD)
__m128 vecA = _mm_set_ps(v1.w, v1.z, v1.y, v1.x);
__m128 vecB = _mm_set_ps(v2.w, v2.z, v2.y, v2.x);
__m128 mul = _mm_mul_ps(vecA, vecB);
#if defined(__SSE3__)
__m128 sum = _mm_hadd_ps(mul, mul);
sum = _mm_hadd_ps(sum, sum);
#else // Non __SSE3__
__m128 sum1 = _mm_add_ps(mul, _mm_shuffle_ps(mul, mul, _MM_SHUFFLE(2, 3, 0, 1)));
__m128 sum = _mm_add_ps(sum1, _mm_shuffle_ps(sum1, sum1, _MM_SHUFFLE(1, 0, 3, 2)));
#endif
result = _mm_cvtss_f32(sum);
#else // Non SIMD
result = (v1.x*v2.x + v1.y*v2.y + v1.z*v2.z + v1.w*v2.w);
#endif
return result;
}

Loading…
Cancel
Save