Skip to content

Commit 7c794b6

Browse files
committed
Optimize encode32/decode32
1 parent 0b5b308 commit 7c794b6

1 file changed

Lines changed: 52 additions & 46 deletions

File tree

src/backend/mode_gpu.js

Lines changed: 52 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -261,62 +261,72 @@
261261
opt.hardcodeConstants ? 'highp vec2 uTexSize = vec2('+texSize[0]+','+texSize[1]+');' : 'uniform highp vec2 uTexSize;',
262262
'varying highp vec2 vTexCoord;',
263263
'',
264+
'vec4 round(vec4 x) {',
265+
' return floor(x + 0.5);',
266+
'}',
267+
'',
268+
'highp float round(highp float x) {',
269+
' return floor(x + 0.5);',
270+
'}',
271+
'',
272+
'vec2 integerMod(vec2 x, float y) {',
273+
' vec2 res = floor(mod(x, y));',
274+
' return res * step(1.0 - floor(y), -res);',
275+
'}',
276+
'',
277+
'vec3 integerMod(vec3 x, float y) {',
278+
' vec3 res = floor(mod(x, y));',
279+
' return res * step(1.0 - floor(y), -res);',
280+
'}',
281+
'',
282+
'vec4 integerMod(vec4 x, vec4 y) {',
283+
' vec4 res = floor(mod(x, y));',
284+
' return res * step(1.0 - floor(y), -res);',
285+
'}',
286+
'',
264287
'highp float integerMod(highp float x, highp float y) {',
265288
' highp float res = floor(mod(x, y));',
266-
' if (res > floor(y) - 1.0) res = 0.0;',
267-
' return res;',
289+
' return res * (res > floor(y) - 1.0 ? 0.0 : 1.0);',
268290
'}',
269291
'',
270292
'highp int integerMod(highp int x, highp int y) {',
271293
' return int(integerMod(float(x), float(y)));',
272294
'}',
273295
'',
274-
//'// Here be dragons!',
275-
//'// DO NOT OPTIMIZE THIS CODE',
276-
//'// YOU WILL BREAK SOMETHING ON SOMEBODY\'S MACHINE',
277-
//'// LEAVE IT AS IT IS, LEST YOU WASTE YOUR OWN TIME',
296+
'// Here be dragons!',
297+
'// DO NOT OPTIMIZE THIS CODE',
298+
'// YOU WILL BREAK SOMETHING ON SOMEBODY\'S MACHINE',
299+
'// LEAVE IT AS IT IS, LEST YOU WASTE YOUR OWN TIME',
300+
'const vec2 MAGIC_VEC = vec2(1.0, -256.0);',
301+
'const vec4 SCALE_FACTOR = vec4(1.0, 256.0, 65536.0, 0.0);',
302+
'const vec4 SCALE_FACTOR_INV = vec4(1.0, 0.00390625, 0.0000152587890625, 0.0); // 1, 1/256, 1/65536',
278303
'highp float decode32(highp vec4 rgba) {',
279304
(endianness == 'LE' ? '' : ' rgba.rgba = rgba.abgr;'),
280305
' rgba *= 255.0;',
281-
' int r = int(rgba.r+0.5);',
282-
' int g = int(rgba.g+0.5);',
283-
' int b = int(rgba.b+0.5);',
284-
' int a = int(rgba.a+0.5);',
285-
' int sign = a > 127 ? -1 : 1;',
286-
' int exponent = 2 * (a > 127 ? a - 128 : a) + (b > 127 ? 1 : 0);',
287-
' float res;',
288-
' if (exponent == 0) {',
289-
' res = float(sign) * 0.0;',
290-
' } else {',
291-
' exponent -= 127;',
292-
' res = exp2(float(exponent));',
293-
' res += float(b > 127 ? b - 128 : b) * exp2(float(exponent-7));',
294-
' res += float(g) * exp2(float(exponent-15));',
295-
' res += float(r) * exp2(float(exponent-23));',
296-
' res *= float(sign);',
297-
' }',
306+
' vec2 gte128;',
307+
' gte128.x = rgba.b >= 128.0 ? 1.0 : 0.0;',
308+
' gte128.y = rgba.a >= 128.0 ? 1.0 : 0.0;',
309+
' float exponent = 2.0 * rgba.a - 127.0 + dot(gte128, MAGIC_VEC);',
310+
' float res = exp2(round(exponent));',
311+
' rgba.b = rgba.b - 128.0 * gte128.x;',
312+
' res = dot(rgba, SCALE_FACTOR) * exp2(round(exponent-23.0)) + res;',
313+
' res *= gte128.y * -2.0 + 1.0;',
298314
' return res;',
299315
'}',
300316
'',
301317
'highp vec4 encode32(highp float f) {',
302-
' if (f == 0.0) return vec4(0.0);',
303318
' highp float F = abs(f);',
304319
' highp float sign = f < 0.0 ? 1.0 : 0.0;',
305-
' highp float log2F = log2(F);',
306-
' highp float exponent = floor(log2F);',
320+
' highp float exponent = floor(log2(F));',
307321
' highp float mantissa = (exp2(-exponent) * F);',
308-
' exponent = floor(log2F) + floor(log2(mantissa));',
309-
' highp float mantissa_part1 = integerMod(F * exp2(23.0-exponent), 256.0);',
310-
' highp float mantissa_part2 = integerMod(F * exp2(15.0-exponent), 256.0);',
311-
' highp float mantissa_part3 = integerMod(F * exp2(7.0-exponent), 128.0);',
312-
' exponent += 127.0;',
313-
' vec4 rgba;',
314-
' rgba.a = 128.0 * sign + exponent/2.0;',
315-
' rgba.b = 128.0 * integerMod(exponent, 2.0) + mantissa_part3;',
316-
' rgba.g = mantissa_part2;',
317-
' rgba.r = mantissa_part1;',
322+
' // exponent += floor(log2(mantissa));',
323+
' vec4 rgba = vec4(F * exp2(23.0-exponent)) * SCALE_FACTOR_INV;',
324+
' rgba.rg = integerMod(rgba.rg, 256.0);',
325+
' rgba.b = integerMod(rgba.b, 128.0);',
326+
' rgba.a = exponent*0.5 + 63.5;',
327+
' rgba.ba += vec2(integerMod(exponent+127.0, 2.0), sign) * 128.0;',
328+
' rgba *= 0.003921569; // 1/255',
318329
(endianness == 'LE' ? '' : ' rgba.rgba = rgba.abgr;'),
319-
' rgba *= 0.003921569;',
320330
' return rgba;',
321331
'}',
322332
'// Dragons end here',
@@ -325,7 +335,6 @@
325335
'highp vec3 threadId;',
326336
'',
327337
'highp vec3 indexTo3D(highp float idx, highp vec3 texDim) {',
328-
' idx = floor(idx + 0.5);',
329338
' highp float z = floor(idx / (texDim.x * texDim.y));',
330339
' idx -= z * texDim.x * texDim.y;',
331340
' highp float y = floor(idx / texDim.x);',
@@ -335,18 +344,15 @@
335344
'',
336345
'highp float get(highp sampler2D tex, highp vec2 texSize, highp vec3 texDim, highp float z, highp float y, highp float x) {',
337346
' highp vec3 xyz = vec3(x, y, z);',
338-
' xyz = floor(xyz + vec3(0.5));',
347+
' xyz = floor(xyz + 0.5);',
339348
(opt.wraparound ? ' xyz = mod(xyz, texDim);' : ''),
340-
' highp float index = floor(xyz.x + texDim.x * (xyz.y + texDim.y * xyz.z) + 0.5);',
349+
' highp float index = round(xyz.x + texDim.x * (xyz.y + texDim.y * xyz.z));',
341350
(opt.floatTextures ? ' int channel = int(integerMod(index, 4.0));' : ''),
342351
(opt.floatTextures ? ' index = float(int(index)/4);' : ''),
343-
' highp float w = floor(texSize.x + 0.5);',
344-
' highp float s = integerMod(index, w);',
345-
' highp float t = float(int(index) / int(w));',
346-
' s += 0.5;',
347-
' t += 0.5;',
352+
' highp float w = round(texSize.x);',
353+
' vec2 st = vec2(integerMod(index, w), float(int(index) / int(w))) + 0.5;',
348354
(opt.floatTextures ? ' index = float(int(index)/4);' : ''),
349-
' highp vec4 texel = texture2D(tex, vec2(s / texSize.x, t / texSize.y));',
355+
' highp vec4 texel = texture2D(tex, st / texSize);',
350356
(opt.floatTextures ? ' if (channel == 0) return texel.r;' : ''),
351357
(opt.floatTextures ? ' if (channel == 1) return texel.g;' : ''),
352358
(opt.floatTextures ? ' if (channel == 2) return texel.b;' : ''),

0 commit comments

Comments
 (0)