Skip to content

Commit 4d54720

Browse files
committed
Optimise get
1 parent df0f368 commit 4d54720

1 file changed

Lines changed: 40 additions & 44 deletions

File tree

src/backend/mode_gpu.js

Lines changed: 40 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -254,19 +254,20 @@
254254
'precision highp int;',
255255
'',
256256
'#define LOOP_MAX '+ (opt.loopMaxIterations ? parseInt(opt.loopMaxIterations)+'.0' : '100.0'),
257+
'#define EPSILON 0.0000001',
257258
'',
258259
opt.hardcodeConstants ? 'vec3 uOutputDim = vec3('+threadDim[0]+','+threadDim[1]+', '+ threadDim[2]+');' : 'uniform vec3 uOutputDim;',
259260
opt.hardcodeConstants ? 'vec2 uTexSize = vec2('+texSize[0]+','+texSize[1]+');' : 'uniform vec2 uTexSize;',
260261
'varying vec2 vTexCoord;',
261262
'',
262263
'float integerMod(float x, float y) {',
263-
' float res = x - y * floor(x/y);',
264+
' float res = floor(x - y * floor(x/y));',
264265
' if (res > y - 0.5) res = 0.0;',
265266
' return res;',
266267
'}',
267268
'',
268269
'int integerMod(int x, int y) {',
269-
' return int(integerMod(float(x), float(y))+0.5);',
270+
' return int(integerMod(float(x), float(y)));',
270271
'}',
271272
'',
272273
'// Here be dragons!',
@@ -276,60 +277,57 @@
276277
'highp float decode32(highp vec4 rgba) {',
277278
(endianness == 'LE' ? '' : ' rgba.rgba = rgba.abgr;'),
278279
' rgba *= 255.0;',
279-
' int r = int(rgba.r+0.5);',
280-
' int g = int(rgba.g+0.5);',
281-
' int b = int(rgba.b+0.5);',
282-
' int a = int(rgba.a+0.5);',
283-
' int sign = a > 127 ? -1 : 1;',
284-
' int exponent = 2 * (a > 127 ? a - 128 : a) + (b > 127 ? 1 : 0);',
280+
' rgba = floor(rgba+0.5);',
281+
' float sign = rgba.a > 127.0 ? -1.0 : 1.0;',
282+
' float exponent = 2.0 * integerMod(rgba.a, 128.0) + (rgba.b > 127.0 ? 1.0 : 0.0);',
285283
' float res;',
286-
' if (exponent == 0) {',
287-
' res = float(sign) * 0.0;',
284+
' if (abs(exponent) < EPSILON) {',
285+
' res = sign * 0.0;',
288286
' } else {',
289-
' exponent -= 127;',
290-
' res = exp2(float(exponent));',
291-
' res += integerMod(float(b), 128.0) * exp2(float(exponent-7));',
292-
' res += float(g) * exp2(float(exponent-15));',
293-
' res += float(r) * exp2(float(exponent-23));',
294-
' res *= float(sign);',
287+
' exponent -= 127.0;',
288+
' res = exp2(exponent);',
289+
' res += integerMod(rgba.b, 128.0) * exp2(exponent-7.0);',
290+
' res += rgba.g * exp2(exponent-15.0);',
291+
' res += rgba.r * exp2(exponent-23.0);',
292+
' res *= sign;',
295293
' }',
296294
' return res;',
297295
'}',
298296
'',
299297
'highp vec4 encode32(highp float f) {',
300298
' if (f == 0.0) return vec4(0.0);',
301299
' highp float F = abs(f);',
302-
' int sign = f < 0.0 ? 1 : 0;',
300+
' float sign = f < 0.0 ? 1.0 : 0.0;',
303301
' float log2F = log2(F);',
304302
' highp float exponentF = floor(log2F); ',
305303
' highp float mantissaF = (exp2(-exponentF) * F);',
306304
' exponentF = floor(log2F + 127.0) + floor(log2(mantissaF));',
307-
' int exponent;',
305+
' float exponent;',
308306
' if (f > 1000.0) {',
309-
' exponent = log2F < 0.0 ? int(log2F)-1 : int(log2F);',
307+
' exponent = log2F < 0.0 ? floor(log2F)-1.0 : floor(log2F);',
310308
' } else {',
311-
' exponent = int(exponentF) - 127;',
309+
' exponent = exponentF - 127.0;',
312310
' }',
313-
' int mantissa_part1 = integerMod(int(F * exp2(float(23-exponent))), 256);',
314-
' int mantissa_part2 = integerMod(int(F * exp2(float(15-exponent))), 256);',
315-
' int mantissa_part3 = integerMod(int(F * exp2(float(7-exponent))), 128);',
316-
' float test = exp2(float(exponent));',
317-
' test += float(mantissa_part3) * exp2(float(exponent-7));',
318-
' test += float(mantissa_part2) * exp2(float(exponent-15));',
319-
' test += float(mantissa_part1) * exp2(float(exponent-23));',
320-
' float error = log2(test) - log2F;',
311+
' float mantissa_part1 = integerMod(F * exp2(23.0-exponent), 256.0);',
312+
' float mantissa_part2 = integerMod(F * exp2(15.0-exponent), 256.0);',
313+
' float mantissa_part3 = integerMod(F * exp2(7.0-exponent), 128.0);',
314+
' float test = exp2(exponent);',
315+
' test += mantissa_part3 * exp2(exponent-7.0);',
316+
' test += mantissa_part2 * exp2(exponent-15.0);',
317+
' test += mantissa_part1 * exp2(exponent-23.0);',
318+
' float error = floor(log2(test) - log2F);',
321319
' if (abs(error) > 0.0) {',
322-
' exponent -= int(error);',
323-
' mantissa_part1 = integerMod(int(F * exp2(float(23-exponent))), 256);',
324-
' mantissa_part2 = integerMod(int(F * exp2(float(15-exponent))), 256);',
325-
' mantissa_part3 = integerMod(int(F * exp2(float(7-exponent))), 128);',
320+
' exponent -= error;',
321+
' mantissa_part1 = integerMod(F * exp2(23.0-exponent), 256.0);',
322+
' mantissa_part2 = integerMod(F * exp2(15.0-exponent), 256.0);',
323+
' mantissa_part3 = integerMod(F * exp2(7.0-exponent), 128.0);',
326324
' }',
327-
' exponent += 127;',
328-
' int a = 128 * sign + (exponent)/2;',
329-
' int b = 128 * integerMod(exponent, 2) + mantissa_part3;',
330-
' int g = mantissa_part2;',
331-
' int r = mantissa_part1;',
332-
' vec4 rgba = vec4(float(r), float(g), float(b), float(a));',
325+
' exponent += 127.0;',
326+
' vec4 rgba;',
327+
' rgba.a = 128.0 * sign + floor(exponent/2.0);',
328+
' rgba.b = 128.0 * integerMod(exponent, 2.0) + mantissa_part3;',
329+
' rgba.g = mantissa_part2;',
330+
' rgba.r = mantissa_part1;',
333331
(endianness == 'LE' ? '' : ' rgba.rgba = rgba.abgr;'),
334332
' rgba /= 255.0;',
335333
' return rgba;',
@@ -351,12 +349,10 @@
351349
'float get(sampler2D tex, vec2 texSize, vec3 texDim, float z, float y, float x) {',
352350
' vec3 xyz = vec3(floor(x + 0.5), floor(y + 0.5), floor(z + 0.5));',
353351
(opt.wraparound ? ' xyz = mod(xyz, texDim);' : ''),
354-
' int index = int((xyz.z * texDim.x * texDim.y) + (xyz.y * texDim.x) + xyz.x + 0.5);',
355-
' int w = int(texSize.x + 0.5);',
356-
' int sI = integerMod(index, w);',
357-
' int tI = index / w;',
358-
' float s = float(sI);',
359-
' float t = float(tI);',
352+
' float index = floor((xyz.z * texDim.x * texDim.y) + (xyz.y * texDim.x) + xyz.x + 0.5);',
353+
' float w = floor(texSize.x + 0.5);',
354+
' float s = integerMod(index, w);',
355+
' float t = float(int(index) / int(w));',
360356
' s += 0.5;',
361357
' t += 0.5;',
362358
' return decode32(texture2D(tex, vec2(s / texSize.x, t / texSize.y)));',

0 commit comments

Comments
 (0)