1// SPDX-License-Identifier: MIT
2//
3// Copyright 2024 Advanced Micro Devices, Inc.
4
5#include "spl_fixpt31_32.h"
6
7static const struct spl_fixed31_32 spl_fixpt_two_pi = { 26986075409LL };
8static const struct spl_fixed31_32 spl_fixpt_ln2 = { 2977044471LL };
9static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { 1488522236LL };
10
11static inline unsigned long long abs_i64(
12 long long arg)
13{
14 if (arg > 0)
15 return (unsigned long long)arg;
16 else
17 return (unsigned long long)(-arg);
18}
19
20/*
21 * @brief
22 * result = dividend / divisor
23 * *remainder = dividend % divisor
24 */
25static inline unsigned long long spl_complete_integer_division_u64(
26 unsigned long long dividend,
27 unsigned long long divisor,
28 unsigned long long *remainder)
29{
30 unsigned long long result;
31
32 result = spl_div64_u64_rem(dividend, divisor, remainder);
33
34 return result;
35}
36
37
38#define FRACTIONAL_PART_MASK \
39 ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
40
41#define GET_INTEGER_PART(x) \
42 ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
43
44#define GET_FRACTIONAL_PART(x) \
45 (FRACTIONAL_PART_MASK & (x))
46
47struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator)
48{
49 struct spl_fixed31_32 res;
50
51 bool arg1_negative = numerator < 0;
52 bool arg2_negative = denominator < 0;
53
54 unsigned long long arg1_value = arg1_negative ? -numerator : numerator;
55 unsigned long long arg2_value = arg2_negative ? -denominator : denominator;
56
57 unsigned long long remainder;
58
59 /* determine integer part */
60
61 unsigned long long res_value = spl_complete_integer_division_u64(
62 dividend: arg1_value, divisor: arg2_value, remainder: &remainder);
63
64 SPL_ASSERT(res_value <= (unsigned long long)LONG_MAX);
65
66 /* determine fractional part */
67 {
68 unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
69
70 do {
71 remainder <<= 1;
72
73 res_value <<= 1;
74
75 if (remainder >= arg2_value) {
76 res_value |= 1;
77 remainder -= arg2_value;
78 }
79 } while (--i != 0);
80 }
81
82 /* round up LSB */
83 {
84 unsigned long long summand = (remainder << 1) >= arg2_value;
85
86 SPL_ASSERT(res_value <= (unsigned long long)LLONG_MAX - summand);
87
88 res_value += summand;
89 }
90
91 res.value = (long long)res_value;
92
93 if (arg1_negative ^ arg2_negative)
94 res.value = -res.value;
95
96 return res;
97}
98
99struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
100{
101 struct spl_fixed31_32 res;
102
103 bool arg1_negative = arg1.value < 0;
104 bool arg2_negative = arg2.value < 0;
105
106 unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value;
107 unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value;
108
109 unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
110 unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
111
112 unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
113 unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
114
115 unsigned long long tmp;
116
117 res.value = arg1_int * arg2_int;
118
119 SPL_ASSERT(res.value <= (long long)LONG_MAX);
120
121 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
122
123 tmp = arg1_int * arg2_fra;
124
125 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
126
127 res.value += tmp;
128
129 tmp = arg2_int * arg1_fra;
130
131 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
132
133 res.value += tmp;
134
135 tmp = arg1_fra * arg2_fra;
136
137 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
138 (tmp >= (unsigned long long)spl_fixpt_half.value);
139
140 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
141
142 res.value += tmp;
143
144 if (arg1_negative ^ arg2_negative)
145 res.value = -res.value;
146
147 return res;
148}
149
150struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg)
151{
152 struct spl_fixed31_32 res;
153
154 unsigned long long arg_value = abs_i64(arg: arg.value);
155
156 unsigned long long arg_int = GET_INTEGER_PART(arg_value);
157
158 unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
159
160 unsigned long long tmp;
161
162 res.value = arg_int * arg_int;
163
164 SPL_ASSERT(res.value <= (long long)LONG_MAX);
165
166 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
167
168 tmp = arg_int * arg_fra;
169
170 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
171
172 res.value += tmp;
173
174 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
175
176 res.value += tmp;
177
178 tmp = arg_fra * arg_fra;
179
180 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
181 (tmp >= (unsigned long long)spl_fixpt_half.value);
182
183 SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
184
185 res.value += tmp;
186
187 return res;
188}
189
190struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg)
191{
192 /*
193 * @note
194 * Good idea to use Newton's method
195 */
196
197 return spl_fixpt_from_fraction(
198 numerator: spl_fixpt_one.value,
199 denominator: arg.value);
200}
201
202struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg)
203{
204 struct spl_fixed31_32 square;
205
206 struct spl_fixed31_32 res = spl_fixpt_one;
207
208 int n = 27;
209
210 struct spl_fixed31_32 arg_norm = arg;
211
212 if (spl_fixpt_le(
213 arg1: spl_fixpt_two_pi,
214 arg2: spl_fixpt_abs(arg))) {
215 arg_norm = spl_fixpt_sub(
216 arg1: arg_norm,
217 arg2: spl_fixpt_mul_int(
218 arg1: spl_fixpt_two_pi,
219 arg2: (int)spl_div64_s64(
220 dividend: arg_norm.value,
221 divisor: spl_fixpt_two_pi.value)));
222 }
223
224 square = spl_fixpt_sqr(arg: arg_norm);
225
226 do {
227 res = spl_fixpt_sub(
228 arg1: spl_fixpt_one,
229 arg2: spl_fixpt_div_int(
230 arg1: spl_fixpt_mul(
231 arg1: square,
232 arg2: res),
233 arg2: n * (n - 1)));
234
235 n -= 2;
236 } while (n > 2);
237
238 if (arg.value != arg_norm.value)
239 res = spl_fixpt_div(
240 arg1: spl_fixpt_mul(arg1: res, arg2: arg_norm),
241 arg2: arg);
242
243 return res;
244}
245
246struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg)
247{
248 return spl_fixpt_mul(
249 arg1: arg,
250 arg2: spl_fixpt_sinc(arg));
251}
252
253struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg)
254{
255 /* TODO implement argument normalization */
256
257 const struct spl_fixed31_32 square = spl_fixpt_sqr(arg);
258
259 struct spl_fixed31_32 res = spl_fixpt_one;
260
261 int n = 26;
262
263 do {
264 res = spl_fixpt_sub(
265 arg1: spl_fixpt_one,
266 arg2: spl_fixpt_div_int(
267 arg1: spl_fixpt_mul(
268 arg1: square,
269 arg2: res),
270 arg2: n * (n - 1)));
271
272 n -= 2;
273 } while (n != 0);
274
275 return res;
276}
277
278/*
279 * @brief
280 * result = exp(arg),
281 * where abs(arg) < 1
282 *
283 * Calculated as Taylor series.
284 */
285static struct spl_fixed31_32 spl_fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg)
286{
287 unsigned int n = 9;
288
289 struct spl_fixed31_32 res = spl_fixpt_from_fraction(
290 numerator: n + 2,
291 denominator: n + 1);
292 /* TODO find correct res */
293
294 SPL_ASSERT(spl_fixpt_lt(arg, spl_fixpt_one));
295
296 do
297 res = spl_fixpt_add(
298 arg1: spl_fixpt_one,
299 arg2: spl_fixpt_div_int(
300 arg1: spl_fixpt_mul(
301 arg1: arg,
302 arg2: res),
303 arg2: n));
304 while (--n != 1);
305
306 return spl_fixpt_add(
307 arg1: spl_fixpt_one,
308 arg2: spl_fixpt_mul(
309 arg1: arg,
310 arg2: res));
311}
312
313struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg)
314{
315 /*
316 * @brief
317 * Main equation is:
318 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
319 * where m = round(x / ln(2)), r = x - m * ln(2)
320 */
321
322 if (spl_fixpt_le(
323 arg1: spl_fixpt_ln2_div_2,
324 arg2: spl_fixpt_abs(arg))) {
325 int m = spl_fixpt_round(
326 arg: spl_fixpt_div(
327 arg1: arg,
328 arg2: spl_fixpt_ln2));
329
330 struct spl_fixed31_32 r = spl_fixpt_sub(
331 arg1: arg,
332 arg2: spl_fixpt_mul_int(
333 arg1: spl_fixpt_ln2,
334 arg2: m));
335
336 SPL_ASSERT(m != 0);
337
338 SPL_ASSERT(spl_fixpt_lt(
339 spl_fixpt_abs(r),
340 spl_fixpt_one));
341
342 if (m > 0)
343 return spl_fixpt_shl(
344 arg: spl_fixed31_32_exp_from_taylor_series(arg: r),
345 shift: (unsigned int)m);
346 else
347 return spl_fixpt_div_int(
348 arg1: spl_fixed31_32_exp_from_taylor_series(arg: r),
349 arg2: 1LL << -m);
350 } else if (arg.value != 0)
351 return spl_fixed31_32_exp_from_taylor_series(arg);
352 else
353 return spl_fixpt_one;
354}
355
356struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg)
357{
358 struct spl_fixed31_32 res = spl_fixpt_neg(arg: spl_fixpt_one);
359 /* TODO improve 1st estimation */
360
361 struct spl_fixed31_32 error;
362
363 SPL_ASSERT(arg.value > 0);
364 /* TODO if arg is negative, return NaN */
365 /* TODO if arg is zero, return -INF */
366
367 do {
368 struct spl_fixed31_32 res1 = spl_fixpt_add(
369 arg1: spl_fixpt_sub(
370 arg1: res,
371 arg2: spl_fixpt_one),
372 arg2: spl_fixpt_div(
373 arg1: arg,
374 arg2: spl_fixpt_exp(arg: res)));
375
376 error = spl_fixpt_sub(
377 arg1: res,
378 arg2: res1);
379
380 res = res1;
381 /* TODO determine max_allowed_error based on quality of exp() */
382 } while (abs_i64(arg: error.value) > 100ULL);
383
384 return res;
385}
386
387
388/* this function is a generic helper to translate fixed point value to
389 * specified integer format that will consist of integer_bits integer part and
390 * fractional_bits fractional part. For example it is used in
391 * spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
392 * part in 32 bits. It is used in hw programming (scaler)
393 */
394
395static inline unsigned int spl_ux_dy(
396 long long value,
397 unsigned int integer_bits,
398 unsigned int fractional_bits)
399{
400 /* 1. create mask of integer part */
401 unsigned int result = (1 << integer_bits) - 1;
402 /* 2. mask out fractional part */
403 unsigned int fractional_part = FRACTIONAL_PART_MASK & value;
404 /* 3. shrink fixed point integer part to be of integer_bits width*/
405 result &= GET_INTEGER_PART(value);
406 /* 4. make space for fractional part to be filled in after integer */
407 result <<= fractional_bits;
408 /* 5. shrink fixed point fractional part to of fractional_bits width*/
409 fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
410 /* 6. merge the result */
411 return result | fractional_part;
412}
413
414static inline unsigned int spl_clamp_ux_dy(
415 long long value,
416 unsigned int integer_bits,
417 unsigned int fractional_bits,
418 unsigned int min_clamp)
419{
420 unsigned int truncated_val = spl_ux_dy(value, integer_bits, fractional_bits);
421
422 if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
423 return (1 << (integer_bits + fractional_bits)) - 1;
424 else if (truncated_val > min_clamp)
425 return truncated_val;
426 else
427 return min_clamp;
428}
429
430unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg)
431{
432 return spl_ux_dy(value: arg.value, integer_bits: 4, fractional_bits: 19);
433}
434
435unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg)
436{
437 return spl_ux_dy(value: arg.value, integer_bits: 3, fractional_bits: 19);
438}
439
440unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg)
441{
442 return spl_ux_dy(value: arg.value, integer_bits: 2, fractional_bits: 19);
443}
444
445unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg)
446{
447 return spl_ux_dy(value: arg.value, integer_bits: 0, fractional_bits: 19);
448}
449
450unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg)
451{
452 return spl_clamp_ux_dy(value: arg.value, integer_bits: 0, fractional_bits: 14, min_clamp: 1);
453}
454
455unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg)
456{
457 return spl_clamp_ux_dy(value: arg.value, integer_bits: 0, fractional_bits: 10, min_clamp: 1);
458}
459
460int spl_fixpt_s4d19(struct spl_fixed31_32 arg)
461{
462 if (arg.value < 0)
463 return -(int)spl_ux_dy(value: spl_fixpt_abs(arg).value, integer_bits: 4, fractional_bits: 19);
464 else
465 return spl_ux_dy(value: arg.value, integer_bits: 4, fractional_bits: 19);
466}
467
468struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value,
469 unsigned int integer_bits,
470 unsigned int fractional_bits)
471{
472 struct spl_fixed31_32 fixpt_value = spl_fixpt_zero;
473 struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero;
474 long long frac_mask = ((long long)1 << (long long)integer_bits) - 1;
475
476 fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
477 frac_mask = frac_mask << fractional_bits;
478 fixpt_int_value.value = value & frac_mask;
479 fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
480 fixpt_value.value |= fixpt_int_value.value;
481 return fixpt_value;
482}
483
484struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value,
485 unsigned int frac_value,
486 unsigned int integer_bits,
487 unsigned int fractional_bits)
488{
489 struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(arg: int_value);
490
491 fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
492 return fixpt_value;
493}
494

source code of linux/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c