Skip to content

Commit d5aff7f

Browse files
committed
Remove folded decimation_info weight arrays
Falling back to the non-folded arrays is marginally slower on perfect memory because of additional indirect loads. However, removing the folded arrays significantly improves caching which offsets the loss. This change reduces the context creation time and memory footprint of the compressor. This is most significant for larger block sizes which have the most decimation_info structures to create and access.
1 parent f758d21 commit d5aff7f

4 files changed

Lines changed: 34 additions & 57 deletions

File tree

Docs/ChangeLog-4x.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,21 @@ release of the 4.x series.
66
All performance data on this page is measured on an Intel Core i5-9600K
77
clocked at 4.2 GHz, running `astcenc` using AVX2 and 6 threads.
88

9+
<!-- ---------------------------------------------------------------------- -->
10+
## 4.3.0
11+
12+
**Status:** In development
13+
14+
The 4.3.0 release is an optimization release. There are minor performance
15+
and image quality improvements in this release.
16+
17+
* **General:**
18+
* **Optimization:** Always skip blue-contraction for `QUANT_256` encodings.
19+
This gives a small image quality improvement for the 4x4 block size.
20+
* **Optimization:** Remove folded `decimation_info` lookup tables. This
21+
reduces compressor memory footprint and improves context creation time.
22+
Impact increases with the active block size.
23+
924
<!-- ---------------------------------------------------------------------- -->
1025
## 4.2.0
1126

@@ -175,4 +190,4 @@ Key for charts:
175190

176191
- - -
177192

178-
_Copyright © 2022, Arm Limited and contributors. All rights reserved._
193+
_Copyright © 2022-2023, Arm Limited and contributors. All rights reserved._

Source/astcenc_block_sizes.cpp

Lines changed: 9 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// ----------------------------------------------------------------------------
3-
// Copyright 2011-2022 Arm Limited
3+
// Copyright 2011-2023 Arm Limited
44
//
55
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
66
// use this file except in compliance with the License. You may obtain a copy
@@ -359,30 +359,17 @@ static void init_decimation_info_2d(
359359
di.weight_texel[j][i] = texel;
360360
di.weights_flt[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
361361

362-
// perform a layer of array unrolling. An aspect of this unrolling is that
363-
// one of the texel-weight indexes is an identity-mapped index; we will use this
364-
// fact to reorder the indexes so that the first one is the identity index.
365-
int swap_idx = -1;
362+
// Store the per-texel contribution of this weight for each texel it contributes to
363+
di.texel_weight_for_weight[i][j] = 0.0f;
366364
for (unsigned int k = 0; k < 4; k++)
367365
{
368366
uint8_t dttw = di.texel_weights_4t[k][texel];
369367
float dttwf = di.texel_weights_float_4t[k][texel];
370368
if (dttw == i && dttwf != 0.0f)
371369
{
372-
swap_idx = k;
370+
di.texel_weight_for_weight[i][j] = di.texel_weights_float_4t[k][texel];
371+
break;
373372
}
374-
di.texel_weights_texel[i][j][k] = dttw;
375-
di.texel_weights_float_texel[i][j][k] = dttwf;
376-
}
377-
378-
if (swap_idx != 0)
379-
{
380-
uint8_t vi = di.texel_weights_texel[i][j][0];
381-
float vf = di.texel_weights_float_texel[i][j][0];
382-
di.texel_weights_texel[i][j][0] = di.texel_weights_texel[i][j][swap_idx];
383-
di.texel_weights_float_texel[i][j][0] = di.texel_weights_float_texel[i][j][swap_idx];
384-
di.texel_weights_texel[i][j][swap_idx] = vi;
385-
di.texel_weights_float_texel[i][j][swap_idx] = vf;
386373
}
387374
}
388375

@@ -628,30 +615,17 @@ static void init_decimation_info_3d(
628615
di.weight_texel[j][i] = static_cast<uint8_t>(texel);
629616
di.weights_flt[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
630617

631-
// perform a layer of array unrolling. An aspect of this unrolling is that
632-
// one of the texel-weight indexes is an identity-mapped index; we will use this
633-
// fact to reorder the indexes so that the first one is the identity index.
634-
int swap_idx = -1;
618+
// Store the per-texel contribution of this weight for each texel it contributes to
619+
di.texel_weight_for_weight[i][j] = 0.0f;
635620
for (unsigned int k = 0; k < 4; k++)
636621
{
637622
uint8_t dttw = di.texel_weights_4t[k][texel];
638623
float dttwf = di.texel_weights_float_4t[k][texel];
639624
if (dttw == i && dttwf != 0.0f)
640625
{
641-
swap_idx = k;
626+
di.texel_weight_for_weight[i][j] = di.texel_weights_float_4t[k][texel];
627+
break;
642628
}
643-
di.texel_weights_texel[i][j][k] = dttw;
644-
di.texel_weights_float_texel[i][j][k] = dttwf;
645-
}
646-
647-
if (swap_idx != 0)
648-
{
649-
uint8_t vi = di.texel_weights_texel[i][j][0];
650-
float vf = di.texel_weights_float_texel[i][j][0];
651-
di.texel_weights_texel[i][j][0] = di.texel_weights_texel[i][j][swap_idx];
652-
di.texel_weights_float_texel[i][j][0] = di.texel_weights_float_texel[i][j][swap_idx];
653-
di.texel_weights_texel[i][j][swap_idx] = vi;
654-
di.texel_weights_float_texel[i][j][swap_idx] = vf;
655629
}
656630
}
657631

Source/astcenc_compress_symbolic.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// ----------------------------------------------------------------------------
3-
// Copyright 2011-2022 Arm Limited
3+
// Copyright 2011-2023 Arm Limited
44
//
55
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
66
// use this file except in compliance with the License. You may obtain a copy
@@ -279,15 +279,12 @@ static bool realign_weights_decimated(
279279
{
280280
unsigned int texel = di.weight_texel[te_idx][we_idx];
281281

282-
const uint8_t *texel_weights = di.texel_weights_texel[we_idx][te_idx];
283-
const float *texel_weights_float = di.texel_weights_float_texel[we_idx][te_idx];
282+
float tw_base = di.texel_weight_for_weight[we_idx][te_idx];
284283

285-
float tw_base = texel_weights_float[0];
286-
287-
float weight_base = (uqw_base * tw_base
288-
+ uq_weightsf[texel_weights[1]] * texel_weights_float[1])
289-
+ (uq_weightsf[texel_weights[2]] * texel_weights_float[2]
290-
+ uq_weightsf[texel_weights[3]] * texel_weights_float[3]);
284+
float weight_base = (uq_weightsf[di.texel_weights_4t[0][texel]] * di.texel_weights_float_4t[0][texel]
285+
+ uq_weightsf[di.texel_weights_4t[1][texel]] * di.texel_weights_float_4t[1][texel])
286+
+ (uq_weightsf[di.texel_weights_4t[2][texel]] * di.texel_weights_float_4t[2][texel]
287+
+ uq_weightsf[di.texel_weights_4t[3][texel]] * di.texel_weights_float_4t[3][texel]);
291288

292289
// Ideally this is integer rounded, but IQ gain it isn't worth the overhead
293290
// float weight = astc::flt_rd(weight_base + 0.5f);

Source/astcenc_internal.h

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// ----------------------------------------------------------------------------
3-
// Copyright 2011-2022 Arm Limited
3+
// Copyright 2011-2023 Arm Limited
44
//
55
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
66
// use this file except in compliance with the License. You may obtain a copy
@@ -381,17 +381,8 @@ struct decimation_info
381381
/** @brief The list of weight indices that contribute to each texel. */
382382
alignas(ASTCENC_VECALIGN) float weights_flt[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
383383

384-
/**
385-
* @brief Folded structure for faster access:
386-
* texel_weights_texel[i][j][.] = texel_weights[.][weight_texel[i][j]]
387-
*/
388-
uint8_t texel_weights_texel[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS][4];
389-
390-
/**
391-
* @brief Folded structure for faster access:
392-
* texel_weights_float_texel[i][j][.] = texel_weights_float[.][weight_texel[i][j]]
393-
*/
394-
float texel_weights_float_texel[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS][4];
384+
/** @brief The weight contribution to the total texel weighting for each weight and texel. */
385+
float texel_weight_for_weight[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS];
395386
};
396387

397388
/**

0 commit comments

Comments
 (0)