spl_fixpt31_32.c source code [linux/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c]

1	// SPDX-License-Identifier: MIT
2	//
3	// Copyright 2024 Advanced Micro Devices, Inc.
4
5	#include "spl_fixpt31_32.h"
6
7	static const struct spl_fixed31_32 spl_fixpt_two_pi = { `26986075409LL` };
8	static const struct spl_fixed31_32 spl_fixpt_ln2 = { `2977044471LL` };
9	static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { `1488522236LL` };
10
11	static inline unsigned long long abs_i64(
12	long long arg)
13	{
14	if (arg > `0`)
15	return (unsigned long long)arg;
16	else
17	return (unsigned long long)(-arg);
18	}
19
20	/*
21	* @brief
22	* result = dividend / divisor
23	* *remainder = dividend % divisor
24	*/
25	static inline unsigned long long spl_complete_integer_division_u64(
26	unsigned long long dividend,
27	unsigned long long divisor,
28	unsigned long long *remainder)
29	{
30	unsigned long long result;
31
32	result = spl_div64_u64_rem(dividend, divisor, remainder);
33
34	return result;
35	}
36
37
38	#define FRACTIONAL_PART_MASK \
39	((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
40
41	#define GET_INTEGER_PART(x) \
42	((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
43
44	#define GET_FRACTIONAL_PART(x) \
45	(FRACTIONAL_PART_MASK & (x))
46
47	struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator)
48	{
49	struct spl_fixed31_32 res;
50
51	bool arg1_negative = numerator < `0`;
52	bool arg2_negative = denominator < `0`;
53
54	unsigned long long arg1_value = arg1_negative ? -numerator : numerator;
55	unsigned long long arg2_value = arg2_negative ? -denominator : denominator;
56
57	unsigned long long remainder;
58
59	/ determine integer part /
60
61	unsigned long long res_value = spl_complete_integer_division_u64(
62	dividend: arg1_value, divisor: arg2_value, remainder: &remainder);
63
64	SPL_ASSERT(res_value <= (unsigned long long)LONG_MAX);
65
66	/ determine fractional part /
67	{
68	unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
69
70	do {
71	remainder <<= `1`;
72
73	res_value <<= `1`;
74
75	if (remainder >= arg2_value) {
76	res_value \|= `1`;
77	remainder -= arg2_value;
78	}
79	} while (--i != `0`);
80	}
81
82	/ round up LSB /
83	{
84	unsigned long long summand = (remainder << `1`) >= arg2_value;
85
86	SPL_ASSERT(res_value <= (unsigned long long)LLONG_MAX - summand);
87
88	res_value += summand;
89	}
90
91	res.value = (long long)res_value;
92
93	if (arg1_negative ^ arg2_negative)
94	res.value = -res.value;
95
96	return res;
97	}
98
99	struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
100	{
101	struct spl_fixed31_32 res;
102
103	bool arg1_negative = arg1.value < `0`;
104	bool arg2_negative = arg2.value < `0`;
105
106	unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value;
107	unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value;
108
109	unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
110	unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
111
112	unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
113	unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
114
115	unsigned long long tmp;
116
117	res.value = arg1_int * arg2_int;
118
119	SPL_ASSERT(res.value <= (long long)LONG_MAX);
120
121	res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
122
123	tmp = arg1_int * arg2_fra;
124
125	SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
126
127	res.value += tmp;
128
129	tmp = arg2_int * arg1_fra;
130
131	SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
132
133	res.value += tmp;
134
135	tmp = arg1_fra * arg2_fra;
136
137	tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
138	(tmp >= (unsigned long long)spl_fixpt_half.value);
139
140	SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
141
142	res.value += tmp;
143
144	if (arg1_negative ^ arg2_negative)
145	res.value = -res.value;
146
147	return res;
148	}
149
150	struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg)
151	{
152	struct spl_fixed31_32 res;
153
154	unsigned long long arg_value = abs_i64(arg: arg.value);
155
156	unsigned long long arg_int = GET_INTEGER_PART(arg_value);
157
158	unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
159
160	unsigned long long tmp;
161
162	res.value = arg_int * arg_int;
163
164	SPL_ASSERT(res.value <= (long long)LONG_MAX);
165
166	res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
167
168	tmp = arg_int * arg_fra;
169
170	SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
171
172	res.value += tmp;
173
174	SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
175
176	res.value += tmp;
177
178	tmp = arg_fra * arg_fra;
179
180	tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
181	(tmp >= (unsigned long long)spl_fixpt_half.value);
182
183	SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
184
185	res.value += tmp;
186
187	return res;
188	}
189
190	struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg)
191	{
192	/*
193	* @note
194	* Good idea to use Newton's method
195	*/
196
197	return spl_fixpt_from_fraction(
198	numerator: spl_fixpt_one.value,
199	denominator: arg.value);
200	}
201
202	struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg)
203	{
204	struct spl_fixed31_32 square;
205
206	struct spl_fixed31_32 res = spl_fixpt_one;
207
208	int n = `27`;
209
210	struct spl_fixed31_32 arg_norm = arg;
211
212	if (spl_fixpt_le(
213	arg1: spl_fixpt_two_pi,
214	arg2: spl_fixpt_abs(arg))) {
215	arg_norm = spl_fixpt_sub(
216	arg1: arg_norm,
217	arg2: spl_fixpt_mul_int(
218	arg1: spl_fixpt_two_pi,
219	arg2: (int)spl_div64_s64(
220	dividend: arg_norm.value,
221	divisor: spl_fixpt_two_pi.value)));
222	}
223
224	square = spl_fixpt_sqr(arg: arg_norm);
225
226	do {
227	res = spl_fixpt_sub(
228	arg1: spl_fixpt_one,
229	arg2: spl_fixpt_div_int(
230	arg1: spl_fixpt_mul(
231	arg1: square,
232	arg2: res),
233	arg2: n * (n - `1`)));
234
235	n -= `2`;
236	} while (n > `2`);
237
238	if (arg.value != arg_norm.value)
239	res = spl_fixpt_div(
240	arg1: spl_fixpt_mul(arg1: res, arg2: arg_norm),
241	arg2: arg);
242
243	return res;
244	}
245
246	struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg)
247	{
248	return spl_fixpt_mul(
249	arg1: arg,
250	arg2: spl_fixpt_sinc(arg));
251	}
252
253	struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg)
254	{
255	/ TODO implement argument normalization /
256
257	const struct spl_fixed31_32 square = spl_fixpt_sqr(arg);
258
259	struct spl_fixed31_32 res = spl_fixpt_one;
260
261	int n = `26`;
262
263	do {
264	res = spl_fixpt_sub(
265	arg1: spl_fixpt_one,
266	arg2: spl_fixpt_div_int(
267	arg1: spl_fixpt_mul(
268	arg1: square,
269	arg2: res),
270	arg2: n * (n - `1`)));
271
272	n -= `2`;
273	} while (n != `0`);
274
275	return res;
276	}
277
278	/*
279	* @brief
280	* result = exp(arg),
281	* where abs(arg) < 1
282	*
283	* Calculated as Taylor series.
284	*/
285	static struct spl_fixed31_32 spl_fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg)
286	{
287	unsigned int n = `9`;
288
289	struct spl_fixed31_32 res = spl_fixpt_from_fraction(
290	numerator: n + `2`,
291	denominator: n + `1`);
292	/ TODO find correct res /
293
294	SPL_ASSERT(spl_fixpt_lt(arg, spl_fixpt_one));
295
296	do
297	res = spl_fixpt_add(
298	arg1: spl_fixpt_one,
299	arg2: spl_fixpt_div_int(
300	arg1: spl_fixpt_mul(
301	arg1: arg,
302	arg2: res),
303	arg2: n));
304	while (--n != `1`);
305
306	return spl_fixpt_add(
307	arg1: spl_fixpt_one,
308	arg2: spl_fixpt_mul(
309	arg1: arg,
310	arg2: res));
311	}
312
313	struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg)
314	{
315	/*
316	* @brief
317	* Main equation is:
318	* exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
319	* where m = round(x / ln(2)), r = x - m * ln(2)
320	*/
321
322	if (spl_fixpt_le(
323	arg1: spl_fixpt_ln2_div_2,
324	arg2: spl_fixpt_abs(arg))) {
325	int m = spl_fixpt_round(
326	arg: spl_fixpt_div(
327	arg1: arg,
328	arg2: spl_fixpt_ln2));
329
330	struct spl_fixed31_32 r = spl_fixpt_sub(
331	arg1: arg,
332	arg2: spl_fixpt_mul_int(
333	arg1: spl_fixpt_ln2,
334	arg2: m));
335
336	SPL_ASSERT(m != `0`);
337
338	SPL_ASSERT(spl_fixpt_lt(
339	spl_fixpt_abs(r),
340	spl_fixpt_one));
341
342	if (m > `0`)
343	return spl_fixpt_shl(
344	arg: spl_fixed31_32_exp_from_taylor_series(arg: r),
345	shift: (unsigned int)m);
346	else
347	return spl_fixpt_div_int(
348	arg1: spl_fixed31_32_exp_from_taylor_series(arg: r),
349	arg2: `1LL` << -m);
350	} else if (arg.value != `0`)
351	return spl_fixed31_32_exp_from_taylor_series(arg);
352	else
353	return spl_fixpt_one;
354	}
355
356	struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg)
357	{
358	struct spl_fixed31_32 res = spl_fixpt_neg(arg: spl_fixpt_one);
359	/ TODO improve 1st estimation /
360
361	struct spl_fixed31_32 error;
362
363	SPL_ASSERT(arg.value > `0`);
364	/ TODO if arg is negative, return NaN /
365	/ TODO if arg is zero, return -INF /
366
367	do {
368	struct spl_fixed31_32 res1 = spl_fixpt_add(
369	arg1: spl_fixpt_sub(
370	arg1: res,
371	arg2: spl_fixpt_one),
372	arg2: spl_fixpt_div(
373	arg1: arg,
374	arg2: spl_fixpt_exp(arg: res)));
375
376	error = spl_fixpt_sub(
377	arg1: res,
378	arg2: res1);
379
380	res = res1;
381	/ TODO determine max_allowed_error based on quality of exp() /
382	} while (abs_i64(arg: error.value) > `100ULL`);
383
384	return res;
385	}
386
387
388	/ this function is a generic helper to translate fixed point value to*
389	* specified integer format that will consist of integer_bits integer part and
390	* fractional_bits fractional part. For example it is used in
391	* spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
392	* part in 32 bits. It is used in hw programming (scaler)
393	*/
394
395	static inline unsigned int spl_ux_dy(
396	long long value,
397	unsigned int integer_bits,
398	unsigned int fractional_bits)
399	{
400	/ 1. create mask of integer part /
401	unsigned int result = (`1` << integer_bits) - `1`;
402	/ 2. mask out fractional part /
403	unsigned int fractional_part = FRACTIONAL_PART_MASK & value;
404	/ 3. shrink fixed point integer part to be of integer_bits width/
405	result &= GET_INTEGER_PART(value);
406	/ 4. make space for fractional part to be filled in after integer /
407	result <<= fractional_bits;
408	/ 5. shrink fixed point fractional part to of fractional_bits width/
409	fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
410	/ 6. merge the result /
411	return result \| fractional_part;
412	}
413
414	static inline unsigned int spl_clamp_ux_dy(
415	long long value,
416	unsigned int integer_bits,
417	unsigned int fractional_bits,
418	unsigned int min_clamp)
419	{
420	unsigned int truncated_val = spl_ux_dy(value, integer_bits, fractional_bits);
421
422	if (value >= (`1LL` << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
423	return (`1` << (integer_bits + fractional_bits)) - `1`;
424	else if (truncated_val > min_clamp)
425	return truncated_val;
426	else
427	return min_clamp;
428	}
429
430	unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg)
431	{
432	return spl_ux_dy(value: arg.value, integer_bits: `4`, fractional_bits: `19`);
433	}
434
435	unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg)
436	{
437	return spl_ux_dy(value: arg.value, integer_bits: `3`, fractional_bits: `19`);
438	}
439
440	unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg)
441	{
442	return spl_ux_dy(value: arg.value, integer_bits: `2`, fractional_bits: `19`);
443	}
444
445	unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg)
446	{
447	return spl_ux_dy(value: arg.value, integer_bits: `0`, fractional_bits: `19`);
448	}
449
450	unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg)
451	{
452	return spl_clamp_ux_dy(value: arg.value, integer_bits: `0`, fractional_bits: `14`, min_clamp: `1`);
453	}
454
455	unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg)
456	{
457	return spl_clamp_ux_dy(value: arg.value, integer_bits: `0`, fractional_bits: `10`, min_clamp: `1`);
458	}
459
460	int spl_fixpt_s4d19(struct spl_fixed31_32 arg)
461	{
462	if (arg.value < `0`)
463	return -(int)spl_ux_dy(value: spl_fixpt_abs(arg).value, integer_bits: `4`, fractional_bits: `19`);
464	else
465	return spl_ux_dy(value: arg.value, integer_bits: `4`, fractional_bits: `19`);
466	}
467
468	struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value,
469	unsigned int integer_bits,
470	unsigned int fractional_bits)
471	{
472	struct spl_fixed31_32 fixpt_value = spl_fixpt_zero;
473	struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero;
474	long long frac_mask = ((long long)`1` << (long long)integer_bits) - `1`;
475
476	fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
477	frac_mask = frac_mask << fractional_bits;
478	fixpt_int_value.value = value & frac_mask;
479	fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
480	fixpt_value.value \|= fixpt_int_value.value;
481	return fixpt_value;
482	}
483
484	struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value,
485	unsigned int frac_value,
486	unsigned int integer_bits,
487	unsigned int fractional_bits)
488	{
489	struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(arg: int_value);
490
491	fixpt_value.value \|= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
492	return fixpt_value;
493	}
494

source code of linux/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c