Skip to content

Commit 925aa4e

Browse files
committed
Added SIMD version of L2 distance
1 parent 9ed39ce commit 925aa4e

2 files changed

Lines changed: 30 additions & 1 deletion

File tree

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,6 @@ jobs:
123123
- uses: ankane/setup-postgres-valgrind@v1
124124
with:
125125
postgres-version: 16
126-
- run: make
126+
- run: make OPTFLAGS=""
127127
- run: sudo --preserve-env=PG_CONFIG make install
128128
- run: make installcheck

src/halfvec.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,13 +810,42 @@ l2_distance_squared_internal(HalfVector * a, HalfVector * b)
810810
half *bx = b->x;
811811
float distance = 0.0;
812812

813+
#if defined(F16C_SUPPORT) && defined(__FMA__)
814+
int i;
815+
float s[8];
816+
int count = (a->dim / 8) * 8;
817+
__m256 dist = _mm256_setzero_ps();
818+
819+
for (i = 0; i < count; i += 8)
820+
{
821+
__m128i axi = _mm_loadu_si128((__m128i *) (ax + i));
822+
__m128i bxi = _mm_loadu_si128((__m128i *) (bx + i));
823+
__m256 axs = _mm256_cvtph_ps(axi);
824+
__m256 bxs = _mm256_cvtph_ps(bxi);
825+
__m256 diff = _mm256_sub_ps(axs, bxs);
826+
827+
dist = _mm256_fmadd_ps(diff, diff, dist);
828+
}
829+
830+
_mm256_store_ps(s, dist);
831+
832+
distance = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7];
833+
834+
for (; i < a->dim; i++)
835+
{
836+
float diff = HalfToFloat4(ax[i]) - HalfToFloat4(bx[i]);
837+
838+
distance += diff * diff;
839+
}
840+
#else
813841
/* Auto-vectorized */
814842
for (int i = 0; i < a->dim; i++)
815843
{
816844
float diff = HalfToFloat4(ax[i]) - HalfToFloat4(bx[i]);
817845

818846
distance += diff * diff;
819847
}
848+
#endif
820849

821850
return (double) distance;
822851
}

0 commit comments

Comments
 (0)