Skip to content

Commit 1ea2994

Browse files
committed
Remove softfloat if hardware fp16 available
1 parent bcd361c commit 1ea2994

4 files changed

Lines changed: 45 additions & 26 deletions

File tree

Source/astcenc_mathlib.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -490,11 +490,11 @@ static inline float2 normalize(float2 p) { return p * astc::rsqrt(dot(p, p)); }
490490
/* ============================================================================
491491
Softfloat library with fp32 and fp16 conversion functionality.
492492
============================================================================ */
493-
uint32_t clz32(uint32_t p);
494-
495-
/* narrowing float->float conversions */
496-
uint16_t float_to_sf16(float val);
497-
float sf16_to_float(uint16_t val);
493+
#if ASTCENC_F16C == 0
494+
/* narrowing float->float conversions */
495+
uint16_t float_to_sf16(float val);
496+
float sf16_to_float(uint16_t val);
497+
#endif
498498

499499
/*********************************
500500
Vector library

Source/astcenc_mathlib_softfloat.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
/**
1919
* @brief Soft-float library for IEEE-754.
2020
*/
21+
#if ASTCENC_F16C == 0
2122

2223
#include "astcenc_mathlib.h"
2324

@@ -61,7 +62,7 @@ typedef uint32_t sf32;
6162

6263
/*
6364
32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
64-
uint32_t clz32(uint32_t inp)
65+
static uint32_t clz32(uint32_t inp)
6566
{
6667
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
6768
uint32_t bsr;
@@ -401,3 +402,5 @@ uint16_t float_to_sf16(float p)
401402
i.f = p;
402403
return sf32_to_sf16(i.u, SF_NEARESTEVEN);
403404
}
405+
406+
#endif

Source/astcenccli_image.cpp

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -285,10 +285,15 @@ float* floatx4_array_from_astc_img(
285285

286286
for (unsigned int x = 0; x < dim_x; x++)
287287
{
288-
dst[4 * x ] = sf16_to_float(data16[(4 * dim_x * ymod) + (4 * x )]);
289-
dst[4 * x + 1] = sf16_to_float(data16[(4 * dim_x * ymod) + (4 * x + 1)]);
290-
dst[4 * x + 2] = sf16_to_float(data16[(4 * dim_x * ymod) + (4 * x + 2)]);
291-
dst[4 * x + 3] = sf16_to_float(data16[(4 * dim_x * ymod) + (4 * x + 3)]);
288+
vint4 colori(
289+
data16[(4 * dim_x * ymod) + (4 * x )],
290+
data16[(4 * dim_x * ymod) + (4 * x + 1)],
291+
data16[(4 * dim_x * ymod) + (4 * x + 2)],
292+
data16[(4 * dim_x * ymod) + (4 * x + 3)]
293+
);
294+
295+
vfloat4 color = float16_to_float(colori);
296+
store(color, dst + 4 * x);
292297
}
293298
}
294299
}
@@ -351,10 +356,19 @@ uint8_t* unorm8x4_array_from_astc_img(
351356

352357
for (unsigned int x = 0; x < dim_x; x++)
353358
{
354-
dst[4 * x ] = (uint8_t)astc::flt2int_rtn(astc::clamp1f(sf16_to_float(data16[(4 * dim_x * ymod) + (4 * x )])) * 255.0f);
355-
dst[4 * x + 1] = (uint8_t)astc::flt2int_rtn(astc::clamp1f(sf16_to_float(data16[(4 * dim_x * ymod) + (4 * x + 1)])) * 255.0f);
356-
dst[4 * x + 2] = (uint8_t)astc::flt2int_rtn(astc::clamp1f(sf16_to_float(data16[(4 * dim_x * ymod) + (4 * x + 2)])) * 255.0f);
357-
dst[4 * x + 3] = (uint8_t)astc::flt2int_rtn(astc::clamp1f(sf16_to_float(data16[(4 * dim_x * ymod) + (4 * x + 3)])) * 255.0f);
359+
vint4 colori(
360+
data16[(4 * dim_x * ymod) + (4 * x )],
361+
data16[(4 * dim_x * ymod) + (4 * x + 1)],
362+
data16[(4 * dim_x * ymod) + (4 * x + 2)],
363+
data16[(4 * dim_x * ymod) + (4 * x + 3)]
364+
);
365+
366+
vfloat4 color = float16_to_float(colori);
367+
color = clamp(0.0f, 1.0f, color) * 255.0f;
368+
369+
colori = float_to_int_rtn(color);
370+
pack_low_bytes(colori);
371+
store_nbytes(colori, dst + 4 * x);
358372
}
359373
}
360374
}

Source/astcenccli_toplevel.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,24 +1128,26 @@ static vfloat4 image_get_pixel(
11281128
{
11291129
uint16_t* data = static_cast<uint16_t*>(img.data[z]);
11301130

1131-
float r = sf16_to_float(data[(4 * img.dim_x * y) + (4 * x )]);
1132-
float g = sf16_to_float(data[(4 * img.dim_x * y) + (4 * x + 1)]);
1133-
float b = sf16_to_float(data[(4 * img.dim_x * y) + (4 * x + 2)]);
1134-
float a = sf16_to_float(data[(4 * img.dim_x * y) + (4 * x + 3)]);
1135-
1136-
return vfloat4(r, g, b, a);
1131+
vint4 colori(
1132+
data[(4 * img.dim_x * y) + (4 * x )],
1133+
data[(4 * img.dim_x * y) + (4 * x + 1)],
1134+
data[(4 * img.dim_x * y) + (4 * x + 2)],
1135+
data[(4 * img.dim_x * y) + (4 * x + 3)]
1136+
);
1137+
1138+
return float16_to_float(colori);
11371139
}
11381140
else // if (img.data_type == ASTCENC_TYPE_F32)
11391141
{
11401142
assert(img.data_type == ASTCENC_TYPE_F32);
11411143
float* data = static_cast<float*>(img.data[z]);
11421144

1143-
float r = data[(4 * img.dim_x * y) + (4 * x )];
1144-
float g = data[(4 * img.dim_x * y) + (4 * x + 1)];
1145-
float b = data[(4 * img.dim_x * y) + (4 * x + 2)];
1146-
float a = data[(4 * img.dim_x * y) + (4 * x + 3)];
1147-
1148-
return vfloat4(r, g, b, a);
1145+
return vfloat4(
1146+
data[(4 * img.dim_x * y) + (4 * x )],
1147+
data[(4 * img.dim_x * y) + (4 * x + 1)],
1148+
data[(4 * img.dim_x * y) + (4 * x + 2)],
1149+
data[(4 * img.dim_x * y) + (4 * x + 3)]
1150+
);
11491151
}
11501152
}
11511153

0 commit comments

Comments
 (0)