Skip to content

Commit 002103b

Browse files
committed
Inline computation of derivatives
1 parent f46cd12 commit 002103b

3 files changed

Lines changed: 35 additions & 54 deletions

File tree

Source/astcenc_compress_symbolic.cpp

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -991,13 +991,17 @@ static float prepare_error_weight_block(
991991
ctx.config.v_rgb_mean != 0.0f || ctx.config.v_rgb_stdev != 0.0f || \
992992
ctx.config.v_a_mean != 0.0f || ctx.config.v_a_stdev != 0.0f;
993993

994-
vfloat4 derv[MAX_TEXELS_PER_BLOCK];
995-
imageblock_initialize_deriv(blk, bsd->texel_count, derv);
996994
vfloat4 color_weights(ctx.config.cw_r_weight,
997995
ctx.config.cw_g_weight,
998996
ctx.config.cw_b_weight,
999997
ctx.config.cw_a_weight);
1000998

999+
// This works because HDR is imposed globally at compression time
1000+
int rgb_lns = blk->rgb_lns[0];
1001+
int a_lns = blk->alpha_lns[0];
1002+
vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
1003+
vmask4 lns_mask = use_lns != vint4::zero();
1004+
10011005
for (int z = 0; z < bsd->zdim; z++)
10021006
{
10031007
for (int y = 0; y < bsd->ydim; y++)
@@ -1014,6 +1018,34 @@ static float prepare_error_weight_block(
10141018
}
10151019
else
10161020
{
1021+
vfloat4 derv(65535.0f);
1022+
1023+
// Compute derivative if we have any use of LNS
1024+
if (any(lns_mask))
1025+
{
1026+
// TODO: Can we avoid some of the multi-type translation?
1027+
vfloat4 data = blk->texel(idx);
1028+
vint4 datai = lns_to_sf16(float_to_int(data));
1029+
1030+
vfloat4 dataf = float16_to_float(datai);
1031+
dataf = max(dataf, 6e-5f);
1032+
1033+
vfloat4 data_lns1 = dataf * 1.05f;
1034+
data_lns1 = float_to_lns(data_lns1);
1035+
1036+
vfloat4 data_lns2 = dataf;
1037+
data_lns2 = float_to_lns(data_lns2);
1038+
1039+
vfloat4 divisor_lns = dataf * 0.05f;
1040+
1041+
// Clamp derivatives between 1/32 and 2^25
1042+
float lo = 1.0f / 32.0f;
1043+
float hi = 33554432.0f;
1044+
vfloat4 derv_lns = clamp(lo, hi, (data_lns1 - data_lns2) / divisor_lns);
1045+
derv = select(derv, derv_lns, lns_mask);
1046+
}
1047+
1048+
// Compute error weight
10171049
vfloat4 error_weight(ctx.config.v_rgb_base,
10181050
ctx.config.v_rgb_base,
10191051
ctx.config.v_rgb_base,
@@ -1099,7 +1131,7 @@ static float prepare_error_weight_block(
10991131
// which is equivalent to dividing by the derivative of the transfer
11001132
// function.
11011133

1102-
error_weight = error_weight / (derv[idx] * derv[idx] * 1e-10f);
1134+
error_weight = error_weight / (derv * derv * 1e-10f);
11031135
ewb->error_weights[idx] = error_weight;
11041136
}
11051137
idx++;

Source/astcenc_image.cpp

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -24,52 +24,6 @@
2424

2525
#include "astcenc_internal.h"
2626

27-
void imageblock_initialize_deriv(
28-
const imageblock* blk,
29-
int pixelcount,
30-
vfloat4* dptr
31-
) {
32-
// TODO: For LDR on the current codec we can skip this if no LNS and just
33-
// early-out as we use the same LNS settings everywhere ...
34-
for (int i = 0; i < pixelcount; i++)
35-
{
36-
vfloat4 derv_unorm(65535.0f);
37-
vfloat4 derv_lns = vfloat4::zero();
38-
39-
// TODO: Pack these into bits and avoid the disjoint fetch
40-
int rgb_lns = blk->rgb_lns[i];
41-
int a_lns = blk->alpha_lns[i];
42-
43-
// Compute derivatives if we have any use of LNS
44-
if (rgb_lns || a_lns)
45-
{
46-
vfloat4 data = blk->texel(i);
47-
vint4 datai = lns_to_sf16(float_to_int(data));
48-
49-
vfloat4 dataf = float16_to_float(datai);
50-
dataf = max(dataf, 6e-5f);
51-
52-
vfloat4 data_lns1 = dataf * 1.05f;
53-
data_lns1 = float_to_lns(data_lns1);
54-
55-
vfloat4 data_lns2 = dataf;
56-
data_lns2 = float_to_lns(data_lns2);
57-
58-
vfloat4 divisor_lns = dataf * 0.05f;
59-
60-
// Clamp derivatives between 1/32 and 2^25
61-
float lo = 1.0f / 32.0f;
62-
float hi = 33554432.0f;
63-
derv_lns = clamp(lo, hi, (data_lns1 - data_lns2) / divisor_lns);
64-
}
65-
66-
vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
67-
vmask4 lns_mask = use_lns != vint4::zero();
68-
*dptr = select(derv_unorm, derv_lns, lns_mask);
69-
dptr++;
70-
}
71-
}
72-
7327
// helper function to initialize the work-data from the orig-data
7428
static void imageblock_initialize_work_from_orig(
7529
imageblock* blk,

Source/astcenc_internal.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1344,11 +1344,6 @@ void expand_deblock_weights(
13441344
// functions pertaining to weight alignment
13451345
void prepare_angular_tables();
13461346

1347-
void imageblock_initialize_deriv(
1348-
const imageblock* blk,
1349-
int pixelcount,
1350-
vfloat4* dptr);
1351-
13521347
void compute_angular_endpoints_1plane(
13531348
bool only_always,
13541349
const block_size_descriptor* bsd,

0 commit comments

Comments
 (0)