Skip to content

Commit 7b1b3bc

Browse files
committed
Scalarize error after loop in realign_weights_decimated
1 parent 311f7b9 commit 7b1b3bc

2 files changed

Lines changed: 10 additions & 9 deletions

File tree

Source/astcenc_block_sizes.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -953,7 +953,7 @@ static void construct_block_size_descriptor_2d(
953953

954954
if (is_dual_plane)
955955
{
956-
dm.ref_2_planes= 1;
956+
dm.ref_2_planes = 1;
957957
}
958958
else
959959
{
@@ -1081,7 +1081,7 @@ static void construct_block_size_descriptor_3d(
10811081
bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
10821082
bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
10831083
bsd.decimation_modes[decimation_mode_count].ref_1_plane = maxprec_1plane == -1 ? 0 : 1;
1084-
bsd.decimation_modes[decimation_mode_count].ref_2_planes= maxprec_2planes == -1 ? 0 : 1;
1084+
bsd.decimation_modes[decimation_mode_count].ref_2_planes = maxprec_2planes == -1 ? 0 : 1;
10851085
decimation_mode_count++;
10861086
}
10871087
}

Source/astcenc_compress_symbolic.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -318,19 +318,20 @@ static bool realign_weights_decimated(
318318
vfloat4 color = color_base + color_offset * plane_weight;
319319

320320
vfloat4 orig_color = blk.texel(texel);
321-
vfloat4 error_weight = blk.channel_weight;
322321

323322
vfloat4 color_diff = color - orig_color;
324323
vfloat4 color_up_diff = color_diff + color_offset * plane_up_weight;
325324
vfloat4 color_down_diff = color_diff + color_offset * plane_down_weight;
326-
current_errorv += dot(color_diff * color_diff, error_weight);
327-
up_errorv += dot(color_up_diff * color_up_diff, error_weight);
328-
down_errorv += dot(color_down_diff * color_down_diff, error_weight);
325+
326+
current_errorv += color_diff * color_diff;
327+
up_errorv += color_up_diff * color_up_diff;
328+
down_errorv += color_down_diff * color_down_diff;
329329
}
330330

331-
float current_error = current_errorv.lane<0>();
332-
float up_error = up_errorv.lane<0>();
333-
float down_error = down_errorv.lane<0>();
331+
vfloat4 error_weight = blk.channel_weight;
332+
float current_error = hadd_s(current_errorv * error_weight);
333+
float up_error = hadd_s(up_errorv * error_weight);
334+
float down_error = hadd_s(down_errorv * error_weight);
334335

335336
// Check if the prev or next error is better, and if so use it
336337
if ((up_error < current_error) && (up_error < down_error))

0 commit comments

Comments
 (0)