1/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "skcms.h"
9#include "skcms_internal.h"
10#include <assert.h>
11#include <float.h>
12#include <limits.h>
13#include <stdlib.h>
14#include <string.h>
15
16#if defined(__ARM_NEON)
17 #include <arm_neon.h>
18#elif defined(__SSE__)
19 #include <immintrin.h>
20
21 #if defined(__clang__)
22 // That #include <immintrin.h> is usually enough, but Clang's headers
23 // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24 // defined, because lots of programs on Windows would include that and
25 // it'd be a lot slower. But we want all those headers included so we
26 // can use their features after runtime checks later.
27 #include <smmintrin.h>
28 #include <avxintrin.h>
29 #include <avx2intrin.h>
30 #include <avx512fintrin.h>
31 #include <avx512dqintrin.h>
32 #endif
33#endif
34
35static bool runtime_cpu_detection = true;
36void skcms_DisableRuntimeCPUDetection() {
37 runtime_cpu_detection = false;
38}
39
40// sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
41// We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
42//
43// Please do not use sizeof() directly, and size_t only when required.
44// (We have no way of enforcing these requests...)
45#define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
46
47// Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
48#define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
49
50static const union {
51 uint32_t bits;
52 float f;
53} inf_ = { .bits: 0x7f800000 };
54#define INFINITY_ inf_.f
55
56#if defined(__clang__) || defined(__GNUC__)
57 #define small_memcpy __builtin_memcpy
58#else
59 #define small_memcpy memcpy
60#endif
61
62static float log2f_(float x) {
63 // The first approximation of log2(x) is its exponent 'e', minus 127.
64 int32_t bits;
65 small_memcpy(&bits, &x, sizeof(bits));
66
67 float e = (float)bits * (1.0f / (1<<23));
68
69 // If we use the mantissa too we can refine the error signficantly.
70 int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
71 float m;
72 small_memcpy(&m, &m_bits, sizeof(m));
73
74 return (e - 124.225514990f
75 - 1.498030302f*m
76 - 1.725879990f/(0.3520887068f + m));
77}
78static float logf_(float x) {
79 const float ln2 = 0.69314718f;
80 return ln2*log2f_(x);
81}
82
83static float exp2f_(float x) {
84 if (x > 128.0f) {
85 return INFINITY_;
86 }
87 float fract = x - floorf_(x);
88
89 float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
90 - 1.490129070f*fract
91 + 27.728023300f/(4.84252568f - fract));
92
93 // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
94 // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
95 // Negative values are effectively underflow - we'll end up returning a (different) negative
96 // value, which makes no sense. So clamp to zero.
97 if (fbits >= (float)INT_MAX) {
98 return INFINITY_;
99 } else if (fbits < 0) {
100 return 0;
101 }
102
103 int32_t bits = (int32_t)fbits;
104 small_memcpy(&x, &bits, sizeof(x));
105 return x;
106}
107
108// Not static, as it's used by some test tools.
109float powf_(float x, float y) {
110 assert (x >= 0);
111 return (x == 0) || (x == 1) ? x
112 : exp2f_(x: log2f_(x) * y);
113}
114
115static float expf_(float x) {
116 const float log2_e = 1.4426950408889634074f;
117 return exp2f_(x: log2_e * x);
118}
119
120static float fmaxf_(float x, float y) { return x > y ? x : y; }
121static float fminf_(float x, float y) { return x < y ? x : y; }
122
123static bool isfinitef_(float x) { return 0 == x*0; }
124
125static float minus_1_ulp(float x) {
126 int32_t bits;
127 memcpy(dest: &bits, src: &x, n: sizeof(bits));
128 bits = bits - 1;
129 memcpy(dest: &x, src: &bits, n: sizeof(bits));
130 return x;
131}
132
133// Most transfer functions we work with are sRGBish.
134// For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
135// and repurpose the other fields to hold the parameters of the HDR functions.
136struct TF_PQish { float A,B,C,D,E,F; };
137struct TF_HLGish { float R,G,a,b,c,K_minus_1; };
138// We didn't originally support a scale factor K for HLG, and instead just stored 0 in
139// the unused `f` field of skcms_TransferFunction for HLGish and HLGInvish transfer functions.
140// By storing f=K-1, those old unusued f=0 values now mean K=1, a noop scale factor.
141
142static float TFKind_marker(skcms_TFType kind) {
143 // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
144 return -(float)kind;
145}
146
147static skcms_TFType classify(const skcms_TransferFunction& tf, TF_PQish* pq = nullptr
148 , TF_HLGish* hlg = nullptr) {
149 if (tf.g < 0 && static_cast<float>(static_cast<int>(tf.g)) == tf.g) {
150 // TODO: soundness checks for PQ/HLG like we do for sRGBish?
151 switch ((int)tf.g) {
152 case -skcms_TFType_PQish:
153 if (pq) {
154 memcpy(dest: pq , src: &tf.a, n: sizeof(*pq ));
155 }
156 return skcms_TFType_PQish;
157 case -skcms_TFType_HLGish:
158 if (hlg) {
159 memcpy(dest: hlg, src: &tf.a, n: sizeof(*hlg));
160 }
161 return skcms_TFType_HLGish;
162 case -skcms_TFType_HLGinvish:
163 if (hlg) {
164 memcpy(dest: hlg, src: &tf.a, n: sizeof(*hlg));
165 }
166 return skcms_TFType_HLGinvish;
167 }
168 return skcms_TFType_Invalid;
169 }
170
171 // Basic soundness checks for sRGBish transfer functions.
172 if (isfinitef_(x: tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
173 // a,c,d,g should be non-negative to make any sense.
174 && tf.a >= 0
175 && tf.c >= 0
176 && tf.d >= 0
177 && tf.g >= 0
178 // Raising a negative value to a fractional tf->g produces complex numbers.
179 && tf.a * tf.d + tf.b >= 0) {
180 return skcms_TFType_sRGBish;
181 }
182
183 return skcms_TFType_Invalid;
184}
185
186skcms_TFType skcms_TransferFunction_getType(const skcms_TransferFunction* tf) {
187 return classify(tf: *tf);
188}
189bool skcms_TransferFunction_isSRGBish(const skcms_TransferFunction* tf) {
190 return classify(tf: *tf) == skcms_TFType_sRGBish;
191}
192bool skcms_TransferFunction_isPQish(const skcms_TransferFunction* tf) {
193 return classify(tf: *tf) == skcms_TFType_PQish;
194}
195bool skcms_TransferFunction_isHLGish(const skcms_TransferFunction* tf) {
196 return classify(tf: *tf) == skcms_TFType_HLGish;
197}
198
199bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
200 float A, float B, float C,
201 float D, float E, float F) {
202 *tf = { .g: TFKind_marker(kind: skcms_TFType_PQish), .a: A,.b: B,.c: C,.d: D,.e: E,.f: F };
203 assert(skcms_TransferFunction_isPQish(tf));
204 return true;
205}
206
207bool skcms_TransferFunction_makeScaledHLGish(skcms_TransferFunction* tf,
208 float K, float R, float G,
209 float a, float b, float c) {
210 *tf = { .g: TFKind_marker(kind: skcms_TFType_HLGish), .a: R,.b: G, .c: a,.d: b,.e: c, .f: K-1.0f };
211 assert(skcms_TransferFunction_isHLGish(tf));
212 return true;
213}
214
215float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
216 float sign = x < 0 ? -1.0f : 1.0f;
217 x *= sign;
218
219 TF_PQish pq;
220 TF_HLGish hlg;
221 switch (classify(tf: *tf, pq: &pq, hlg: &hlg)) {
222 case skcms_TFType_Invalid: break;
223
224 case skcms_TFType_HLGish: {
225 const float K = hlg.K_minus_1 + 1.0f;
226 return K * sign * (x*hlg.R <= 1 ? powf_(x: x*hlg.R, y: hlg.G)
227 : expf_(x: (x-hlg.c)*hlg.a) + hlg.b);
228 }
229
230 // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
231 case skcms_TFType_HLGinvish: {
232 const float K = hlg.K_minus_1 + 1.0f;
233 x /= K;
234 return sign * (x <= 1 ? hlg.R * powf_(x, y: hlg.G)
235 : hlg.a * logf_(x: x - hlg.b) + hlg.c);
236 }
237
238 case skcms_TFType_sRGBish:
239 return sign * (x < tf->d ? tf->c * x + tf->f
240 : powf_(x: tf->a * x + tf->b, y: tf->g) + tf->e);
241
242 case skcms_TFType_PQish: return sign * powf_(x: fmaxf_(x: pq.A + pq.B * powf_(x, y: pq.C), y: 0)
243 / (pq.D + pq.E * powf_(x, y: pq.C)),
244 y: pq.F);
245 }
246 return 0;
247}
248
249
250static float eval_curve(const skcms_Curve* curve, float x) {
251 if (curve->table_entries == 0) {
252 return skcms_TransferFunction_eval(tf: &curve->parametric, x);
253 }
254
255 float ix = fmaxf_(x: 0, y: fminf_(x, y: 1)) * static_cast<float>(curve->table_entries - 1);
256 int lo = (int) ix ,
257 hi = (int)(float)minus_1_ulp(x: ix + 1.0f);
258 float t = ix - (float)lo;
259
260 float l, h;
261 if (curve->table_8) {
262 l = curve->table_8[lo] * (1/255.0f);
263 h = curve->table_8[hi] * (1/255.0f);
264 } else {
265 uint16_t be_l, be_h;
266 memcpy(dest: &be_l, src: curve->table_16 + 2*lo, n: 2);
267 memcpy(dest: &be_h, src: curve->table_16 + 2*hi, n: 2);
268 uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
269 uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
270 l = le_l * (1/65535.0f);
271 h = le_h * (1/65535.0f);
272 }
273 return l + (h-l)*t;
274}
275
276float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
277 uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
278 const float dx = 1.0f / static_cast<float>(N - 1);
279 float err = 0;
280 for (uint32_t i = 0; i < N; i++) {
281 float x = static_cast<float>(i) * dx,
282 y = eval_curve(curve, x);
283 err = fmaxf_(x: err, y: fabsf_(x: x - skcms_TransferFunction_eval(tf: inv_tf, x: y)));
284 }
285 return err;
286}
287
288bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
289 return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
290}
291
292// Additional ICC signature values that are only used internally
293enum {
294 // File signature
295 skcms_Signature_acsp = 0x61637370,
296
297 // Tag signatures
298 skcms_Signature_rTRC = 0x72545243,
299 skcms_Signature_gTRC = 0x67545243,
300 skcms_Signature_bTRC = 0x62545243,
301 skcms_Signature_kTRC = 0x6B545243,
302
303 skcms_Signature_rXYZ = 0x7258595A,
304 skcms_Signature_gXYZ = 0x6758595A,
305 skcms_Signature_bXYZ = 0x6258595A,
306
307 skcms_Signature_A2B0 = 0x41324230,
308 skcms_Signature_B2A0 = 0x42324130,
309
310 skcms_Signature_CHAD = 0x63686164,
311 skcms_Signature_WTPT = 0x77747074,
312
313 skcms_Signature_CICP = 0x63696370,
314
315 // Type signatures
316 skcms_Signature_curv = 0x63757276,
317 skcms_Signature_mft1 = 0x6D667431,
318 skcms_Signature_mft2 = 0x6D667432,
319 skcms_Signature_mAB = 0x6D414220,
320 skcms_Signature_mBA = 0x6D424120,
321 skcms_Signature_para = 0x70617261,
322 skcms_Signature_sf32 = 0x73663332,
323 // XYZ is also a PCS signature, so it's defined in skcms.h
324 // skcms_Signature_XYZ = 0x58595A20,
325};
326
327static uint16_t read_big_u16(const uint8_t* ptr) {
328 uint16_t be;
329 memcpy(dest: &be, src: ptr, n: sizeof(be));
330#if defined(_MSC_VER)
331 return _byteswap_ushort(be);
332#else
333 return __builtin_bswap16(be);
334#endif
335}
336
337static uint32_t read_big_u32(const uint8_t* ptr) {
338 uint32_t be;
339 memcpy(dest: &be, src: ptr, n: sizeof(be));
340#if defined(_MSC_VER)
341 return _byteswap_ulong(be);
342#else
343 return __builtin_bswap32(be);
344#endif
345}
346
347static int32_t read_big_i32(const uint8_t* ptr) {
348 return (int32_t)read_big_u32(ptr);
349}
350
351static float read_big_fixed(const uint8_t* ptr) {
352 return static_cast<float>(read_big_i32(ptr)) * (1.0f / 65536.0f);
353}
354
355// Maps to an in-memory profile so that fields line up to the locations specified
356// in ICC.1:2010, section 7.2
357typedef struct {
358 uint8_t size [ 4];
359 uint8_t cmm_type [ 4];
360 uint8_t version [ 4];
361 uint8_t profile_class [ 4];
362 uint8_t data_color_space [ 4];
363 uint8_t pcs [ 4];
364 uint8_t creation_date_time [12];
365 uint8_t signature [ 4];
366 uint8_t platform [ 4];
367 uint8_t flags [ 4];
368 uint8_t device_manufacturer [ 4];
369 uint8_t device_model [ 4];
370 uint8_t device_attributes [ 8];
371 uint8_t rendering_intent [ 4];
372 uint8_t illuminant_X [ 4];
373 uint8_t illuminant_Y [ 4];
374 uint8_t illuminant_Z [ 4];
375 uint8_t creator [ 4];
376 uint8_t profile_id [16];
377 uint8_t reserved [28];
378 uint8_t tag_count [ 4]; // Technically not part of header, but required
379} header_Layout;
380
381typedef struct {
382 uint8_t signature [4];
383 uint8_t offset [4];
384 uint8_t size [4];
385} tag_Layout;
386
387static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
388 return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
389}
390
391// s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
392// use of the type is for the CHAD tag that stores exactly nine values.
393typedef struct {
394 uint8_t type [ 4];
395 uint8_t reserved [ 4];
396 uint8_t values [36];
397} sf32_Layout;
398
399bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
400 skcms_ICCTag tag;
401 if (!skcms_GetTagBySignature(profile, sig: skcms_Signature_CHAD, &tag)) {
402 return false;
403 }
404
405 if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
406 return false;
407 }
408
409 const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
410 const uint8_t* values = sf32Tag->values;
411 for (int r = 0; r < 3; ++r)
412 for (int c = 0; c < 3; ++c, values += 4) {
413 m->vals[r][c] = read_big_fixed(ptr: values);
414 }
415 return true;
416}
417
418// XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
419// the type are for tags/data that store exactly one triple.
420typedef struct {
421 uint8_t type [4];
422 uint8_t reserved [4];
423 uint8_t X [4];
424 uint8_t Y [4];
425 uint8_t Z [4];
426} XYZ_Layout;
427
428static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
429 if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
430 return false;
431 }
432
433 const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
434
435 *x = read_big_fixed(ptr: xyzTag->X);
436 *y = read_big_fixed(ptr: xyzTag->Y);
437 *z = read_big_fixed(ptr: xyzTag->Z);
438 return true;
439}
440
441bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
442 skcms_ICCTag tag;
443 return skcms_GetTagBySignature(profile, sig: skcms_Signature_WTPT, &tag) &&
444 read_tag_xyz(tag: &tag, x: &xyz[0], y: &xyz[1], z: &xyz[2]);
445}
446
447static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
448 const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
449 return read_tag_xyz(tag: rXYZ, x: &toXYZ->vals[0][0], y: &toXYZ->vals[1][0], z: &toXYZ->vals[2][0]) &&
450 read_tag_xyz(tag: gXYZ, x: &toXYZ->vals[0][1], y: &toXYZ->vals[1][1], z: &toXYZ->vals[2][1]) &&
451 read_tag_xyz(tag: bXYZ, x: &toXYZ->vals[0][2], y: &toXYZ->vals[1][2], z: &toXYZ->vals[2][2]);
452}
453
454typedef struct {
455 uint8_t type [4];
456 uint8_t reserved_a [4];
457 uint8_t function_type [2];
458 uint8_t reserved_b [2];
459 uint8_t variable [1/*variable*/]; // 1, 3, 4, 5, or 7 s15.16, depending on function_type
460} para_Layout;
461
462static bool read_curve_para(const uint8_t* buf, uint32_t size,
463 skcms_Curve* curve, uint32_t* curve_size) {
464 if (size < SAFE_FIXED_SIZE(para_Layout)) {
465 return false;
466 }
467
468 const para_Layout* paraTag = (const para_Layout*)buf;
469
470 enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
471 uint16_t function_type = read_big_u16(ptr: paraTag->function_type);
472 if (function_type > kGABCDEF) {
473 return false;
474 }
475
476 static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
477 if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
478 return false;
479 }
480
481 if (curve_size) {
482 *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
483 }
484
485 curve->table_entries = 0;
486 curve->parametric.a = 1.0f;
487 curve->parametric.b = 0.0f;
488 curve->parametric.c = 0.0f;
489 curve->parametric.d = 0.0f;
490 curve->parametric.e = 0.0f;
491 curve->parametric.f = 0.0f;
492 curve->parametric.g = read_big_fixed(ptr: paraTag->variable);
493
494 switch (function_type) {
495 case kGAB:
496 curve->parametric.a = read_big_fixed(ptr: paraTag->variable + 4);
497 curve->parametric.b = read_big_fixed(ptr: paraTag->variable + 8);
498 if (curve->parametric.a == 0) {
499 return false;
500 }
501 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
502 break;
503 case kGABC:
504 curve->parametric.a = read_big_fixed(ptr: paraTag->variable + 4);
505 curve->parametric.b = read_big_fixed(ptr: paraTag->variable + 8);
506 curve->parametric.e = read_big_fixed(ptr: paraTag->variable + 12);
507 if (curve->parametric.a == 0) {
508 return false;
509 }
510 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
511 curve->parametric.f = curve->parametric.e;
512 break;
513 case kGABCD:
514 curve->parametric.a = read_big_fixed(ptr: paraTag->variable + 4);
515 curve->parametric.b = read_big_fixed(ptr: paraTag->variable + 8);
516 curve->parametric.c = read_big_fixed(ptr: paraTag->variable + 12);
517 curve->parametric.d = read_big_fixed(ptr: paraTag->variable + 16);
518 break;
519 case kGABCDEF:
520 curve->parametric.a = read_big_fixed(ptr: paraTag->variable + 4);
521 curve->parametric.b = read_big_fixed(ptr: paraTag->variable + 8);
522 curve->parametric.c = read_big_fixed(ptr: paraTag->variable + 12);
523 curve->parametric.d = read_big_fixed(ptr: paraTag->variable + 16);
524 curve->parametric.e = read_big_fixed(ptr: paraTag->variable + 20);
525 curve->parametric.f = read_big_fixed(ptr: paraTag->variable + 24);
526 break;
527 }
528 return skcms_TransferFunction_isSRGBish(tf: &curve->parametric);
529}
530
531typedef struct {
532 uint8_t type [4];
533 uint8_t reserved [4];
534 uint8_t value_count [4];
535 uint8_t variable [1/*variable*/]; // value_count, 8.8 if 1, uint16 (n*65535) if > 1
536} curv_Layout;
537
538static bool read_curve_curv(const uint8_t* buf, uint32_t size,
539 skcms_Curve* curve, uint32_t* curve_size) {
540 if (size < SAFE_FIXED_SIZE(curv_Layout)) {
541 return false;
542 }
543
544 const curv_Layout* curvTag = (const curv_Layout*)buf;
545
546 uint32_t value_count = read_big_u32(ptr: curvTag->value_count);
547 if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
548 return false;
549 }
550
551 if (curve_size) {
552 *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
553 }
554
555 if (value_count < 2) {
556 curve->table_entries = 0;
557 curve->parametric.a = 1.0f;
558 curve->parametric.b = 0.0f;
559 curve->parametric.c = 0.0f;
560 curve->parametric.d = 0.0f;
561 curve->parametric.e = 0.0f;
562 curve->parametric.f = 0.0f;
563 if (value_count == 0) {
564 // Empty tables are a shorthand for an identity curve
565 curve->parametric.g = 1.0f;
566 } else {
567 // Single entry tables are a shorthand for simple gamma
568 curve->parametric.g = read_big_u16(ptr: curvTag->variable) * (1.0f / 256.0f);
569 }
570 } else {
571 curve->table_8 = nullptr;
572 curve->table_16 = curvTag->variable;
573 curve->table_entries = value_count;
574 }
575
576 return true;
577}
578
579// Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
580// If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
581static bool read_curve(const uint8_t* buf, uint32_t size,
582 skcms_Curve* curve, uint32_t* curve_size) {
583 if (!buf || size < 4 || !curve) {
584 return false;
585 }
586
587 uint32_t type = read_big_u32(ptr: buf);
588 if (type == skcms_Signature_para) {
589 return read_curve_para(buf, size, curve, curve_size);
590 } else if (type == skcms_Signature_curv) {
591 return read_curve_curv(buf, size, curve, curve_size);
592 }
593
594 return false;
595}
596
597// mft1 and mft2 share a large chunk of data
598typedef struct {
599 uint8_t type [ 4];
600 uint8_t reserved_a [ 4];
601 uint8_t input_channels [ 1];
602 uint8_t output_channels [ 1];
603 uint8_t grid_points [ 1];
604 uint8_t reserved_b [ 1];
605 uint8_t matrix [36];
606} mft_CommonLayout;
607
608typedef struct {
609 mft_CommonLayout common [1];
610
611 uint8_t variable [1/*variable*/];
612} mft1_Layout;
613
614typedef struct {
615 mft_CommonLayout common [1];
616
617 uint8_t input_table_entries [2];
618 uint8_t output_table_entries [2];
619 uint8_t variable [1/*variable*/];
620} mft2_Layout;
621
622static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
623 // MFT matrices are applied before the first set of curves, but must be identity unless the
624 // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
625 // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
626 // field/flag.
627 a2b->matrix_channels = 0;
628 a2b-> input_channels = mftTag-> input_channels[0];
629 a2b->output_channels = mftTag->output_channels[0];
630
631 // We require exactly three (ie XYZ/Lab/RGB) output channels
632 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
633 return false;
634 }
635 // We require at least one, and no more than four (ie CMYK) input channels
636 if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
637 return false;
638 }
639
640 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
641 a2b->grid_points[i] = mftTag->grid_points[0];
642 }
643 // The grid only makes sense with at least two points along each axis
644 if (a2b->grid_points[0] < 2) {
645 return false;
646 }
647 return true;
648}
649
650// All as the A2B version above, except where noted.
651static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_B2A* b2a) {
652 // Same as A2B.
653 b2a->matrix_channels = 0;
654 b2a-> input_channels = mftTag-> input_channels[0];
655 b2a->output_channels = mftTag->output_channels[0];
656
657
658 // For B2A, exactly 3 input channels (XYZ) and 3 (RGB) or 4 (CMYK) output channels.
659 if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
660 return false;
661 }
662 if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
663 return false;
664 }
665
666 // Same as A2B.
667 for (uint32_t i = 0; i < b2a->input_channels; ++i) {
668 b2a->grid_points[i] = mftTag->grid_points[0];
669 }
670 if (b2a->grid_points[0] < 2) {
671 return false;
672 }
673 return true;
674}
675
676template <typename A2B_or_B2A>
677static bool init_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
678 uint32_t input_table_entries, uint32_t output_table_entries,
679 A2B_or_B2A* out) {
680 // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
681 uint32_t byte_len_per_input_table = input_table_entries * byte_width;
682 uint32_t byte_len_per_output_table = output_table_entries * byte_width;
683
684 // [input|output]_channels are <= 4, so still no overflow
685 uint32_t byte_len_all_input_tables = out->input_channels * byte_len_per_input_table;
686 uint32_t byte_len_all_output_tables = out->output_channels * byte_len_per_output_table;
687
688 uint64_t grid_size = out->output_channels * byte_width;
689 for (uint32_t axis = 0; axis < out->input_channels; ++axis) {
690 grid_size *= out->grid_points[axis];
691 }
692
693 if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
694 return false;
695 }
696
697 for (uint32_t i = 0; i < out->input_channels; ++i) {
698 out->input_curves[i].table_entries = input_table_entries;
699 if (byte_width == 1) {
700 out->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
701 out->input_curves[i].table_16 = nullptr;
702 } else {
703 out->input_curves[i].table_8 = nullptr;
704 out->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
705 }
706 }
707
708 if (byte_width == 1) {
709 out->grid_8 = table_base + byte_len_all_input_tables;
710 out->grid_16 = nullptr;
711 } else {
712 out->grid_8 = nullptr;
713 out->grid_16 = table_base + byte_len_all_input_tables;
714 }
715
716 const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
717 for (uint32_t i = 0; i < out->output_channels; ++i) {
718 out->output_curves[i].table_entries = output_table_entries;
719 if (byte_width == 1) {
720 out->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
721 out->output_curves[i].table_16 = nullptr;
722 } else {
723 out->output_curves[i].table_8 = nullptr;
724 out->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
725 }
726 }
727
728 return true;
729}
730
731template <typename A2B_or_B2A>
732static bool read_tag_mft1(const skcms_ICCTag* tag, A2B_or_B2A* out) {
733 if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
734 return false;
735 }
736
737 const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
738 if (!read_mft_common(mftTag->common, out)) {
739 return false;
740 }
741
742 uint32_t input_table_entries = 256;
743 uint32_t output_table_entries = 256;
744
745 return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
746 input_table_entries, output_table_entries, out);
747}
748
749template <typename A2B_or_B2A>
750static bool read_tag_mft2(const skcms_ICCTag* tag, A2B_or_B2A* out) {
751 if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
752 return false;
753 }
754
755 const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
756 if (!read_mft_common(mftTag->common, out)) {
757 return false;
758 }
759
760 uint32_t input_table_entries = read_big_u16(ptr: mftTag->input_table_entries);
761 uint32_t output_table_entries = read_big_u16(ptr: mftTag->output_table_entries);
762
763 // ICC spec mandates that 2 <= table_entries <= 4096
764 if (input_table_entries < 2 || input_table_entries > 4096 ||
765 output_table_entries < 2 || output_table_entries > 4096) {
766 return false;
767 }
768
769 return init_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
770 input_table_entries, output_table_entries, out);
771}
772
773static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
774 uint32_t num_curves, skcms_Curve* curves) {
775 for (uint32_t i = 0; i < num_curves; ++i) {
776 if (curve_offset > size) {
777 return false;
778 }
779
780 uint32_t curve_bytes;
781 if (!read_curve(buf: buf + curve_offset, size: size - curve_offset, curve: &curves[i], curve_size: &curve_bytes)) {
782 return false;
783 }
784
785 if (curve_bytes > UINT32_MAX - 3) {
786 return false;
787 }
788 curve_bytes = (curve_bytes + 3) & ~3U;
789
790 uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
791 curve_offset = (uint32_t)new_offset_64;
792 if (new_offset_64 != curve_offset) {
793 return false;
794 }
795 }
796
797 return true;
798}
799
800// mAB and mBA tags use the same encoding, including color lookup tables.
801typedef struct {
802 uint8_t type [ 4];
803 uint8_t reserved_a [ 4];
804 uint8_t input_channels [ 1];
805 uint8_t output_channels [ 1];
806 uint8_t reserved_b [ 2];
807 uint8_t b_curve_offset [ 4];
808 uint8_t matrix_offset [ 4];
809 uint8_t m_curve_offset [ 4];
810 uint8_t clut_offset [ 4];
811 uint8_t a_curve_offset [ 4];
812} mAB_or_mBA_Layout;
813
814typedef struct {
815 uint8_t grid_points [16];
816 uint8_t grid_byte_width [ 1];
817 uint8_t reserved [ 3];
818 uint8_t variable [1/*variable*/];
819} CLUT_Layout;
820
821static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
822 if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
823 return false;
824 }
825
826 const mAB_or_mBA_Layout* mABTag = (const mAB_or_mBA_Layout*)tag->buf;
827
828 a2b->input_channels = mABTag->input_channels[0];
829 a2b->output_channels = mABTag->output_channels[0];
830
831 // We require exactly three (ie XYZ/Lab/RGB) output channels
832 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
833 return false;
834 }
835 // We require no more than four (ie CMYK) input channels
836 if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
837 return false;
838 }
839
840 uint32_t b_curve_offset = read_big_u32(ptr: mABTag->b_curve_offset);
841 uint32_t matrix_offset = read_big_u32(ptr: mABTag->matrix_offset);
842 uint32_t m_curve_offset = read_big_u32(ptr: mABTag->m_curve_offset);
843 uint32_t clut_offset = read_big_u32(ptr: mABTag->clut_offset);
844 uint32_t a_curve_offset = read_big_u32(ptr: mABTag->a_curve_offset);
845
846 // "B" curves must be present
847 if (0 == b_curve_offset) {
848 return false;
849 }
850
851 if (!read_curves(buf: tag->buf, size: tag->size, curve_offset: b_curve_offset, num_curves: a2b->output_channels,
852 curves: a2b->output_curves)) {
853 return false;
854 }
855
856 // "M" curves and Matrix must be used together
857 if (0 != m_curve_offset) {
858 if (0 == matrix_offset) {
859 return false;
860 }
861 a2b->matrix_channels = a2b->output_channels;
862 if (!read_curves(buf: tag->buf, size: tag->size, curve_offset: m_curve_offset, num_curves: a2b->matrix_channels,
863 curves: a2b->matrix_curves)) {
864 return false;
865 }
866
867 // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
868 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
869 return false;
870 }
871 float encoding_factor = pcs_is_xyz ? (65535 / 32768.0f) : 1.0f;
872 const uint8_t* mtx_buf = tag->buf + matrix_offset;
873 a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(ptr: mtx_buf + 0);
874 a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(ptr: mtx_buf + 4);
875 a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(ptr: mtx_buf + 8);
876 a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(ptr: mtx_buf + 12);
877 a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(ptr: mtx_buf + 16);
878 a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(ptr: mtx_buf + 20);
879 a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(ptr: mtx_buf + 24);
880 a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(ptr: mtx_buf + 28);
881 a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(ptr: mtx_buf + 32);
882 a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(ptr: mtx_buf + 36);
883 a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(ptr: mtx_buf + 40);
884 a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(ptr: mtx_buf + 44);
885 } else {
886 if (0 != matrix_offset) {
887 return false;
888 }
889 a2b->matrix_channels = 0;
890 }
891
892 // "A" curves and CLUT must be used together
893 if (0 != a_curve_offset) {
894 if (0 == clut_offset) {
895 return false;
896 }
897 if (!read_curves(buf: tag->buf, size: tag->size, curve_offset: a_curve_offset, num_curves: a2b->input_channels,
898 curves: a2b->input_curves)) {
899 return false;
900 }
901
902 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
903 return false;
904 }
905 const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
906
907 if (clut->grid_byte_width[0] == 1) {
908 a2b->grid_8 = clut->variable;
909 a2b->grid_16 = nullptr;
910 } else if (clut->grid_byte_width[0] == 2) {
911 a2b->grid_8 = nullptr;
912 a2b->grid_16 = clut->variable;
913 } else {
914 return false;
915 }
916
917 uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0]; // the payload
918 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
919 a2b->grid_points[i] = clut->grid_points[i];
920 // The grid only makes sense with at least two points along each axis
921 if (a2b->grid_points[i] < 2) {
922 return false;
923 }
924 grid_size *= a2b->grid_points[i];
925 }
926 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
927 return false;
928 }
929 } else {
930 if (0 != clut_offset) {
931 return false;
932 }
933
934 // If there is no CLUT, the number of input and output channels must match
935 if (a2b->input_channels != a2b->output_channels) {
936 return false;
937 }
938
939 // Zero out the number of input channels to signal that we're skipping this stage
940 a2b->input_channels = 0;
941 }
942
943 return true;
944}
945
946// Exactly the same as read_tag_mab(), except where there are comments.
947// TODO: refactor the two to eliminate common code?
948static bool read_tag_mba(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
949 if (tag->size < SAFE_SIZEOF(mAB_or_mBA_Layout)) {
950 return false;
951 }
952
953 const mAB_or_mBA_Layout* mBATag = (const mAB_or_mBA_Layout*)tag->buf;
954
955 b2a->input_channels = mBATag->input_channels[0];
956 b2a->output_channels = mBATag->output_channels[0];
957
958 // Require exactly 3 inputs (XYZ) and 3 (RGB) or 4 (CMYK) outputs.
959 if (b2a->input_channels != ARRAY_COUNT(b2a->input_curves)) {
960 return false;
961 }
962 if (b2a->output_channels < 3 || b2a->output_channels > ARRAY_COUNT(b2a->output_curves)) {
963 return false;
964 }
965
966 uint32_t b_curve_offset = read_big_u32(ptr: mBATag->b_curve_offset);
967 uint32_t matrix_offset = read_big_u32(ptr: mBATag->matrix_offset);
968 uint32_t m_curve_offset = read_big_u32(ptr: mBATag->m_curve_offset);
969 uint32_t clut_offset = read_big_u32(ptr: mBATag->clut_offset);
970 uint32_t a_curve_offset = read_big_u32(ptr: mBATag->a_curve_offset);
971
972 if (0 == b_curve_offset) {
973 return false;
974 }
975
976 // "B" curves are our inputs, not outputs.
977 if (!read_curves(buf: tag->buf, size: tag->size, curve_offset: b_curve_offset, num_curves: b2a->input_channels,
978 curves: b2a->input_curves)) {
979 return false;
980 }
981
982 if (0 != m_curve_offset) {
983 if (0 == matrix_offset) {
984 return false;
985 }
986 // Matrix channels is tied to input_channels (3), not output_channels.
987 b2a->matrix_channels = b2a->input_channels;
988
989 if (!read_curves(buf: tag->buf, size: tag->size, curve_offset: m_curve_offset, num_curves: b2a->matrix_channels,
990 curves: b2a->matrix_curves)) {
991 return false;
992 }
993
994 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
995 return false;
996 }
997 float encoding_factor = pcs_is_xyz ? (32768 / 65535.0f) : 1.0f; // TODO: understand
998 const uint8_t* mtx_buf = tag->buf + matrix_offset;
999 b2a->matrix.vals[0][0] = encoding_factor * read_big_fixed(ptr: mtx_buf + 0);
1000 b2a->matrix.vals[0][1] = encoding_factor * read_big_fixed(ptr: mtx_buf + 4);
1001 b2a->matrix.vals[0][2] = encoding_factor * read_big_fixed(ptr: mtx_buf + 8);
1002 b2a->matrix.vals[1][0] = encoding_factor * read_big_fixed(ptr: mtx_buf + 12);
1003 b2a->matrix.vals[1][1] = encoding_factor * read_big_fixed(ptr: mtx_buf + 16);
1004 b2a->matrix.vals[1][2] = encoding_factor * read_big_fixed(ptr: mtx_buf + 20);
1005 b2a->matrix.vals[2][0] = encoding_factor * read_big_fixed(ptr: mtx_buf + 24);
1006 b2a->matrix.vals[2][1] = encoding_factor * read_big_fixed(ptr: mtx_buf + 28);
1007 b2a->matrix.vals[2][2] = encoding_factor * read_big_fixed(ptr: mtx_buf + 32);
1008 b2a->matrix.vals[0][3] = encoding_factor * read_big_fixed(ptr: mtx_buf + 36);
1009 b2a->matrix.vals[1][3] = encoding_factor * read_big_fixed(ptr: mtx_buf + 40);
1010 b2a->matrix.vals[2][3] = encoding_factor * read_big_fixed(ptr: mtx_buf + 44);
1011 } else {
1012 if (0 != matrix_offset) {
1013 return false;
1014 }
1015 b2a->matrix_channels = 0;
1016 }
1017
1018 if (0 != a_curve_offset) {
1019 if (0 == clut_offset) {
1020 return false;
1021 }
1022
1023 // "A" curves are our output, not input.
1024 if (!read_curves(buf: tag->buf, size: tag->size, curve_offset: a_curve_offset, num_curves: b2a->output_channels,
1025 curves: b2a->output_curves)) {
1026 return false;
1027 }
1028
1029 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout)) {
1030 return false;
1031 }
1032 const CLUT_Layout* clut = (const CLUT_Layout*)(tag->buf + clut_offset);
1033
1034 if (clut->grid_byte_width[0] == 1) {
1035 b2a->grid_8 = clut->variable;
1036 b2a->grid_16 = nullptr;
1037 } else if (clut->grid_byte_width[0] == 2) {
1038 b2a->grid_8 = nullptr;
1039 b2a->grid_16 = clut->variable;
1040 } else {
1041 return false;
1042 }
1043
1044 uint64_t grid_size = b2a->output_channels * clut->grid_byte_width[0];
1045 for (uint32_t i = 0; i < b2a->input_channels; ++i) {
1046 b2a->grid_points[i] = clut->grid_points[i];
1047 if (b2a->grid_points[i] < 2) {
1048 return false;
1049 }
1050 grid_size *= b2a->grid_points[i];
1051 }
1052 if (tag->size < clut_offset + SAFE_FIXED_SIZE(CLUT_Layout) + grid_size) {
1053 return false;
1054 }
1055 } else {
1056 if (0 != clut_offset) {
1057 return false;
1058 }
1059
1060 if (b2a->input_channels != b2a->output_channels) {
1061 return false;
1062 }
1063
1064 // Zero out *output* channels to skip this stage.
1065 b2a->output_channels = 0;
1066 }
1067 return true;
1068}
1069
1070// If you pass f, we'll fit a possibly-non-zero value for *f.
1071// If you pass nullptr, we'll assume you want *f to be treated as zero.
1072static int fit_linear(const skcms_Curve* curve, int N, float tol,
1073 float* c, float* d, float* f = nullptr) {
1074 assert(N > 1);
1075 // We iteratively fit the first points to the TF's linear piece.
1076 // We want the cx + f line to pass through the first and last points we fit exactly.
1077 //
1078 // As we walk along the points we find the minimum and maximum slope of the line before the
1079 // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
1080 // emtpy, when we definitely can't add any more points.
1081 //
1082 // Some points' error intervals may intersect the running interval but not lie fully
1083 // within it. So we keep track of the last point we saw that is a valid end point candidate,
1084 // and once the search is done, back up to build the line through *that* point.
1085 const float dx = 1.0f / static_cast<float>(N - 1);
1086
1087 int lin_points = 1;
1088
1089 float f_zero = 0.0f;
1090 if (f) {
1091 *f = eval_curve(curve, x: 0);
1092 } else {
1093 f = &f_zero;
1094 }
1095
1096
1097 float slope_min = -INFINITY_;
1098 float slope_max = +INFINITY_;
1099 for (int i = 1; i < N; ++i) {
1100 float x = static_cast<float>(i) * dx;
1101 float y = eval_curve(curve, x);
1102
1103 float slope_max_i = (y + tol - *f) / x,
1104 slope_min_i = (y - tol - *f) / x;
1105 if (slope_max_i < slope_min || slope_max < slope_min_i) {
1106 // Slope intervals would no longer overlap.
1107 break;
1108 }
1109 slope_max = fminf_(x: slope_max, y: slope_max_i);
1110 slope_min = fmaxf_(x: slope_min, y: slope_min_i);
1111
1112 float cur_slope = (y - *f) / x;
1113 if (slope_min <= cur_slope && cur_slope <= slope_max) {
1114 lin_points = i + 1;
1115 *c = cur_slope;
1116 }
1117 }
1118
1119 // Set D to the last point that met our tolerance.
1120 *d = static_cast<float>(lin_points - 1) * dx;
1121 return lin_points;
1122}
1123
1124// If this skcms_Curve holds an identity table, rewrite it as an identity skcms_TransferFunction.
1125static void canonicalize_identity(skcms_Curve* curve) {
1126 if (curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
1127 int N = (int)curve->table_entries;
1128
1129 float c = 0.0f, d = 0.0f, f = 0.0f;
1130 if (N == fit_linear(curve, N, tol: 1.0f/static_cast<float>(2*N), c: &c,d: &d,f: &f)
1131 && c == 1.0f
1132 && f == 0.0f) {
1133 curve->table_entries = 0;
1134 curve->table_8 = nullptr;
1135 curve->table_16 = nullptr;
1136 curve->parametric = skcms_TransferFunction{.g: 1,.a: 1,.b: 0,.c: 0,.d: 0,.e: 0,.f: 0};
1137 }
1138 }
1139}
1140
1141static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
1142 bool ok = false;
1143 if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, out: a2b); }
1144 if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, out: a2b); }
1145 if (tag->type == skcms_Signature_mAB ) { ok = read_tag_mab(tag, a2b, pcs_is_xyz); }
1146 if (!ok) {
1147 return false;
1148 }
1149
1150 if (a2b->input_channels > 0) { canonicalize_identity(curve: a2b->input_curves + 0); }
1151 if (a2b->input_channels > 1) { canonicalize_identity(curve: a2b->input_curves + 1); }
1152 if (a2b->input_channels > 2) { canonicalize_identity(curve: a2b->input_curves + 2); }
1153 if (a2b->input_channels > 3) { canonicalize_identity(curve: a2b->input_curves + 3); }
1154
1155 if (a2b->matrix_channels > 0) { canonicalize_identity(curve: a2b->matrix_curves + 0); }
1156 if (a2b->matrix_channels > 1) { canonicalize_identity(curve: a2b->matrix_curves + 1); }
1157 if (a2b->matrix_channels > 2) { canonicalize_identity(curve: a2b->matrix_curves + 2); }
1158
1159 if (a2b->output_channels > 0) { canonicalize_identity(curve: a2b->output_curves + 0); }
1160 if (a2b->output_channels > 1) { canonicalize_identity(curve: a2b->output_curves + 1); }
1161 if (a2b->output_channels > 2) { canonicalize_identity(curve: a2b->output_curves + 2); }
1162
1163 return true;
1164}
1165
1166static bool read_b2a(const skcms_ICCTag* tag, skcms_B2A* b2a, bool pcs_is_xyz) {
1167 bool ok = false;
1168 if (tag->type == skcms_Signature_mft1) { ok = read_tag_mft1(tag, out: b2a); }
1169 if (tag->type == skcms_Signature_mft2) { ok = read_tag_mft2(tag, out: b2a); }
1170 if (tag->type == skcms_Signature_mBA ) { ok = read_tag_mba(tag, b2a, pcs_is_xyz); }
1171 if (!ok) {
1172 return false;
1173 }
1174
1175 if (b2a->input_channels > 0) { canonicalize_identity(curve: b2a->input_curves + 0); }
1176 if (b2a->input_channels > 1) { canonicalize_identity(curve: b2a->input_curves + 1); }
1177 if (b2a->input_channels > 2) { canonicalize_identity(curve: b2a->input_curves + 2); }
1178
1179 if (b2a->matrix_channels > 0) { canonicalize_identity(curve: b2a->matrix_curves + 0); }
1180 if (b2a->matrix_channels > 1) { canonicalize_identity(curve: b2a->matrix_curves + 1); }
1181 if (b2a->matrix_channels > 2) { canonicalize_identity(curve: b2a->matrix_curves + 2); }
1182
1183 if (b2a->output_channels > 0) { canonicalize_identity(curve: b2a->output_curves + 0); }
1184 if (b2a->output_channels > 1) { canonicalize_identity(curve: b2a->output_curves + 1); }
1185 if (b2a->output_channels > 2) { canonicalize_identity(curve: b2a->output_curves + 2); }
1186 if (b2a->output_channels > 3) { canonicalize_identity(curve: b2a->output_curves + 3); }
1187
1188 return true;
1189}
1190
1191typedef struct {
1192 uint8_t type [4];
1193 uint8_t reserved [4];
1194 uint8_t color_primaries [1];
1195 uint8_t transfer_characteristics [1];
1196 uint8_t matrix_coefficients [1];
1197 uint8_t video_full_range_flag [1];
1198} CICP_Layout;
1199
1200static bool read_cicp(const skcms_ICCTag* tag, skcms_CICP* cicp) {
1201 if (tag->type != skcms_Signature_CICP || tag->size < SAFE_SIZEOF(CICP_Layout)) {
1202 return false;
1203 }
1204
1205 const CICP_Layout* cicpTag = (const CICP_Layout*)tag->buf;
1206
1207 cicp->color_primaries = cicpTag->color_primaries[0];
1208 cicp->transfer_characteristics = cicpTag->transfer_characteristics[0];
1209 cicp->matrix_coefficients = cicpTag->matrix_coefficients[0];
1210 cicp->video_full_range_flag = cicpTag->video_full_range_flag[0];
1211 return true;
1212}
1213
1214void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
1215 if (!profile || !profile->buffer || !tag) { return; }
1216 if (idx > profile->tag_count) { return; }
1217 const tag_Layout* tags = get_tag_table(profile);
1218 tag->signature = read_big_u32(ptr: tags[idx].signature);
1219 tag->size = read_big_u32(ptr: tags[idx].size);
1220 tag->buf = read_big_u32(ptr: tags[idx].offset) + profile->buffer;
1221 tag->type = read_big_u32(ptr: tag->buf);
1222}
1223
1224bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
1225 if (!profile || !profile->buffer || !tag) { return false; }
1226 const tag_Layout* tags = get_tag_table(profile);
1227 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1228 if (read_big_u32(ptr: tags[i].signature) == sig) {
1229 tag->signature = sig;
1230 tag->size = read_big_u32(ptr: tags[i].size);
1231 tag->buf = read_big_u32(ptr: tags[i].offset) + profile->buffer;
1232 tag->type = read_big_u32(ptr: tag->buf);
1233 return true;
1234 }
1235 }
1236 return false;
1237}
1238
1239static bool usable_as_src(const skcms_ICCProfile* profile) {
1240 return profile->has_A2B
1241 || (profile->has_trc && profile->has_toXYZD50);
1242}
1243
1244bool skcms_ParseWithA2BPriority(const void* buf, size_t len,
1245 const int priority[], const int priorities,
1246 skcms_ICCProfile* profile) {
1247 assert(SAFE_SIZEOF(header_Layout) == 132);
1248
1249 if (!profile) {
1250 return false;
1251 }
1252 memset(s: profile, c: 0, SAFE_SIZEOF(*profile));
1253
1254 if (len < SAFE_SIZEOF(header_Layout)) {
1255 return false;
1256 }
1257
1258 // Byte-swap all header fields
1259 const header_Layout* header = (const header_Layout*)buf;
1260 profile->buffer = (const uint8_t*)buf;
1261 profile->size = read_big_u32(ptr: header->size);
1262 uint32_t version = read_big_u32(ptr: header->version);
1263 profile->data_color_space = read_big_u32(ptr: header->data_color_space);
1264 profile->pcs = read_big_u32(ptr: header->pcs);
1265 uint32_t signature = read_big_u32(ptr: header->signature);
1266 float illuminant_X = read_big_fixed(ptr: header->illuminant_X);
1267 float illuminant_Y = read_big_fixed(ptr: header->illuminant_Y);
1268 float illuminant_Z = read_big_fixed(ptr: header->illuminant_Z);
1269 profile->tag_count = read_big_u32(ptr: header->tag_count);
1270
1271 // Validate signature, size (smaller than buffer, large enough to hold tag table),
1272 // and major version
1273 uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1274 if (signature != skcms_Signature_acsp ||
1275 profile->size > len ||
1276 profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1277 (version >> 24) > 4) {
1278 return false;
1279 }
1280
1281 // Validate that illuminant is D50 white
1282 if (fabsf_(x: illuminant_X - 0.9642f) > 0.0100f ||
1283 fabsf_(x: illuminant_Y - 1.0000f) > 0.0100f ||
1284 fabsf_(x: illuminant_Z - 0.8249f) > 0.0100f) {
1285 return false;
1286 }
1287
1288 // Validate that all tag entries have sane offset + size
1289 const tag_Layout* tags = get_tag_table(profile);
1290 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1291 uint32_t tag_offset = read_big_u32(ptr: tags[i].offset);
1292 uint32_t tag_size = read_big_u32(ptr: tags[i].size);
1293 uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
1294 if (tag_size < 4 || tag_end > profile->size) {
1295 return false;
1296 }
1297 }
1298
1299 if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1300 return false;
1301 }
1302
1303 bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1304
1305 // Pre-parse commonly used tags.
1306 skcms_ICCTag kTRC;
1307 if (profile->data_color_space == skcms_Signature_Gray &&
1308 skcms_GetTagBySignature(profile, sig: skcms_Signature_kTRC, tag: &kTRC)) {
1309 if (!read_curve(buf: kTRC.buf, size: kTRC.size, curve: &profile->trc[0], curve_size: nullptr)) {
1310 // Malformed tag
1311 return false;
1312 }
1313 profile->trc[1] = profile->trc[0];
1314 profile->trc[2] = profile->trc[0];
1315 profile->has_trc = true;
1316
1317 if (pcs_is_xyz) {
1318 profile->toXYZD50.vals[0][0] = illuminant_X;
1319 profile->toXYZD50.vals[1][1] = illuminant_Y;
1320 profile->toXYZD50.vals[2][2] = illuminant_Z;
1321 profile->has_toXYZD50 = true;
1322 }
1323 } else {
1324 skcms_ICCTag rTRC, gTRC, bTRC;
1325 if (skcms_GetTagBySignature(profile, sig: skcms_Signature_rTRC, tag: &rTRC) &&
1326 skcms_GetTagBySignature(profile, sig: skcms_Signature_gTRC, tag: &gTRC) &&
1327 skcms_GetTagBySignature(profile, sig: skcms_Signature_bTRC, tag: &bTRC)) {
1328 if (!read_curve(buf: rTRC.buf, size: rTRC.size, curve: &profile->trc[0], curve_size: nullptr) ||
1329 !read_curve(buf: gTRC.buf, size: gTRC.size, curve: &profile->trc[1], curve_size: nullptr) ||
1330 !read_curve(buf: bTRC.buf, size: bTRC.size, curve: &profile->trc[2], curve_size: nullptr)) {
1331 // Malformed TRC tags
1332 return false;
1333 }
1334 profile->has_trc = true;
1335 }
1336
1337 skcms_ICCTag rXYZ, gXYZ, bXYZ;
1338 if (skcms_GetTagBySignature(profile, sig: skcms_Signature_rXYZ, tag: &rXYZ) &&
1339 skcms_GetTagBySignature(profile, sig: skcms_Signature_gXYZ, tag: &gXYZ) &&
1340 skcms_GetTagBySignature(profile, sig: skcms_Signature_bXYZ, tag: &bXYZ)) {
1341 if (!read_to_XYZD50(rXYZ: &rXYZ, gXYZ: &gXYZ, bXYZ: &bXYZ, toXYZ: &profile->toXYZD50)) {
1342 // Malformed XYZ tags
1343 return false;
1344 }
1345 profile->has_toXYZD50 = true;
1346 }
1347 }
1348
1349 for (int i = 0; i < priorities; i++) {
1350 // enum { perceptual, relative_colormetric, saturation }
1351 if (priority[i] < 0 || priority[i] > 2) {
1352 return false;
1353 }
1354 uint32_t sig = skcms_Signature_A2B0 + static_cast<uint32_t>(priority[i]);
1355 skcms_ICCTag tag;
1356 if (skcms_GetTagBySignature(profile, sig, tag: &tag)) {
1357 if (!read_a2b(tag: &tag, a2b: &profile->A2B, pcs_is_xyz)) {
1358 // Malformed A2B tag
1359 return false;
1360 }
1361 profile->has_A2B = true;
1362 break;
1363 }
1364 }
1365
1366 for (int i = 0; i < priorities; i++) {
1367 // enum { perceptual, relative_colormetric, saturation }
1368 if (priority[i] < 0 || priority[i] > 2) {
1369 return false;
1370 }
1371 uint32_t sig = skcms_Signature_B2A0 + static_cast<uint32_t>(priority[i]);
1372 skcms_ICCTag tag;
1373 if (skcms_GetTagBySignature(profile, sig, tag: &tag)) {
1374 if (!read_b2a(tag: &tag, b2a: &profile->B2A, pcs_is_xyz)) {
1375 // Malformed B2A tag
1376 return false;
1377 }
1378 profile->has_B2A = true;
1379 break;
1380 }
1381 }
1382
1383 skcms_ICCTag cicp_tag;
1384 if (skcms_GetTagBySignature(profile, sig: skcms_Signature_CICP, tag: &cicp_tag)) {
1385 if (!read_cicp(tag: &cicp_tag, cicp: &profile->CICP)) {
1386 // Malformed CICP tag
1387 return false;
1388 }
1389 profile->has_CICP = true;
1390 }
1391
1392 return usable_as_src(profile);
1393}
1394
1395
1396const skcms_ICCProfile* skcms_sRGB_profile() {
1397 static const skcms_ICCProfile sRGB_profile = {
1398 .buffer: nullptr, // buffer, moot here
1399
1400 .size: 0, // size, moot here
1401 .data_color_space: skcms_Signature_RGB, // data_color_space
1402 .pcs: skcms_Signature_XYZ, // pcs
1403 .tag_count: 0, // tag count, moot here
1404
1405 // We choose to represent sRGB with its canonical transfer function,
1406 // and with its canonical XYZD50 gamut matrix.
1407 .has_trc: true, // has_trc, followed by the 3 trc curves
1408 .trc: {
1409 {{.alias_of_table_entries: 0, .parametric: {.g: 2.4f, .a: (float)(1/1.055), .b: (float)(0.055/1.055), .c: (float)(1/12.92), .d: 0.04045f, .e: 0, .f: 0}}},
1410 {{.alias_of_table_entries: 0, .parametric: {.g: 2.4f, .a: (float)(1/1.055), .b: (float)(0.055/1.055), .c: (float)(1/12.92), .d: 0.04045f, .e: 0, .f: 0}}},
1411 {{.alias_of_table_entries: 0, .parametric: {.g: 2.4f, .a: (float)(1/1.055), .b: (float)(0.055/1.055), .c: (float)(1/12.92), .d: 0.04045f, .e: 0, .f: 0}}},
1412 },
1413
1414 .has_toXYZD50: true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1415 .toXYZD50: {.vals: {
1416 { 0.436065674f, 0.385147095f, 0.143066406f },
1417 { 0.222488403f, 0.716873169f, 0.060607910f },
1418 { 0.013916016f, 0.097076416f, 0.714096069f },
1419 }},
1420
1421 .has_A2B: false, // has_A2B, followed by A2B itself, which we don't care about.
1422 .A2B: {
1423 .input_channels: 0,
1424 .input_curves: {
1425 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1426 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1427 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1428 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1429 },
1430 .grid_points: {0,0,0,0},
1431 .grid_8: nullptr,
1432 .grid_16: nullptr,
1433
1434 .matrix_channels: 0,
1435 .matrix_curves: {
1436 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1437 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1438 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1439 },
1440 .matrix: {.vals: {
1441 { 0,0,0,0 },
1442 { 0,0,0,0 },
1443 { 0,0,0,0 },
1444 }},
1445
1446 .output_channels: 0,
1447 .output_curves: {
1448 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1449 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1450 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1451 },
1452 },
1453
1454 .has_B2A: false, // has_B2A, followed by B2A itself, which we also don't care about.
1455 .B2A: {
1456 .input_channels: 0,
1457 .input_curves: {
1458 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1459 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1460 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1461 },
1462
1463 .matrix_channels: 0,
1464 .matrix: {.vals: {
1465 { 0,0,0,0 },
1466 { 0,0,0,0 },
1467 { 0,0,0,0 },
1468 }},
1469 .matrix_curves: {
1470 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1471 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1472 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1473 },
1474
1475 .output_channels: 0,
1476 .grid_points: {0,0,0,0},
1477 .grid_8: nullptr,
1478 .grid_16: nullptr,
1479 .output_curves: {
1480 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1481 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1482 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1483 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1484 },
1485 },
1486
1487 .has_CICP: false, // has_CICP, followed by cicp itself which we don't care about.
1488 .CICP: { .color_primaries: 0, .transfer_characteristics: 0, .matrix_coefficients: 0, .video_full_range_flag: 0 },
1489 };
1490 return &sRGB_profile;
1491}
1492
1493const skcms_ICCProfile* skcms_XYZD50_profile() {
1494 // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1495 static const skcms_ICCProfile XYZD50_profile = {
1496 .buffer: nullptr, // buffer, moot here
1497
1498 .size: 0, // size, moot here
1499 .data_color_space: skcms_Signature_RGB, // data_color_space
1500 .pcs: skcms_Signature_XYZ, // pcs
1501 .tag_count: 0, // tag count, moot here
1502
1503 .has_trc: true, // has_trc, followed by the 3 trc curves
1504 .trc: {
1505 {{.alias_of_table_entries: 0, .parametric: {.g: 1,.a: 1, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1506 {{.alias_of_table_entries: 0, .parametric: {.g: 1,.a: 1, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1507 {{.alias_of_table_entries: 0, .parametric: {.g: 1,.a: 1, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1508 },
1509
1510 .has_toXYZD50: true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1511 .toXYZD50: {.vals: {
1512 { 1,0,0 },
1513 { 0,1,0 },
1514 { 0,0,1 },
1515 }},
1516
1517 .has_A2B: false, // has_A2B, followed by A2B itself, which we don't care about.
1518 .A2B: {
1519 .input_channels: 0,
1520 .input_curves: {
1521 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1522 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1523 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1524 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1525 },
1526 .grid_points: {0,0,0,0},
1527 .grid_8: nullptr,
1528 .grid_16: nullptr,
1529
1530 .matrix_channels: 0,
1531 .matrix_curves: {
1532 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1533 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1534 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1535 },
1536 .matrix: {.vals: {
1537 { 0,0,0,0 },
1538 { 0,0,0,0 },
1539 { 0,0,0,0 },
1540 }},
1541
1542 .output_channels: 0,
1543 .output_curves: {
1544 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1545 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1546 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1547 },
1548 },
1549
1550 .has_B2A: false, // has_B2A, followed by B2A itself, which we also don't care about.
1551 .B2A: {
1552 .input_channels: 0,
1553 .input_curves: {
1554 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1555 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1556 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1557 },
1558
1559 .matrix_channels: 0,
1560 .matrix: {.vals: {
1561 { 0,0,0,0 },
1562 { 0,0,0,0 },
1563 { 0,0,0,0 },
1564 }},
1565 .matrix_curves: {
1566 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1567 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1568 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1569 },
1570
1571 .output_channels: 0,
1572 .grid_points: {0,0,0,0},
1573 .grid_8: nullptr,
1574 .grid_16: nullptr,
1575 .output_curves: {
1576 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1577 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1578 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1579 {{.alias_of_table_entries: 0, .parametric: {.g: 0,.a: 0, .b: 0,.c: 0,.d: 0,.e: 0,.f: 0}}},
1580 },
1581 },
1582
1583 .has_CICP: false, // has_CICP, followed by cicp itself which we don't care about.
1584 .CICP: { .color_primaries: 0, .transfer_characteristics: 0, .matrix_coefficients: 0, .video_full_range_flag: 0 },
1585 };
1586
1587 return &XYZD50_profile;
1588}
1589
1590const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1591 return &skcms_sRGB_profile()->trc[0].parametric;
1592}
1593
1594const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1595 static const skcms_TransferFunction sRGB_inv =
1596 {.g: 0.416666657f, .a: 1.137283325f, .b: -0.0f, .c: 12.920000076f, .d: 0.003130805f, .e: -0.054969788f, .f: -0.0f};
1597 return &sRGB_inv;
1598}
1599
1600const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1601 static const skcms_TransferFunction identity = {.g: 1,.a: 1,.b: 0,.c: 0,.d: 0,.e: 0,.f: 0};
1602 return &identity;
1603}
1604
1605const uint8_t skcms_252_random_bytes[] = {
1606 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1607 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1608 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1609 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1610 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1611 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1612 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1613 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1614 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1615 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1616 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1617 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1618 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1619 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1620 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1621 112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1622};
1623
1624bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1625 // Test for exactly equal profiles first.
1626 if (A == B || 0 == memcmp(s1: A,s2: B, n: sizeof(skcms_ICCProfile))) {
1627 return true;
1628 }
1629
1630 // For now this is the essentially the same strategy we use in test_only.c
1631 // for our skcms_Transform() smoke tests:
1632 // 1) transform A to XYZD50
1633 // 2) transform B to XYZD50
1634 // 3) return true if they're similar enough
1635 // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1636
1637 // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1638 // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
1639
1640 // We want to allow otherwise equivalent profiles tagged as grayscale and RGB
1641 // to be treated as equal. But CMYK profiles are a totally different ballgame.
1642 const auto CMYK = skcms_Signature_CMYK;
1643 if ((A->data_color_space == CMYK) != (B->data_color_space == CMYK)) {
1644 return false;
1645 }
1646
1647 // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1648 // TODO: working with RGBA_8888 either way is probably fastest.
1649 skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1650 size_t npixels = 84;
1651 if (A->data_color_space == skcms_Signature_CMYK) {
1652 fmt = skcms_PixelFormat_RGBA_8888;
1653 npixels = 63;
1654 }
1655
1656 // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1657 // use pre-canned results and skip that skcms_Transform() call?
1658 uint8_t dstA[252],
1659 dstB[252];
1660 if (!skcms_Transform(
1661 src: skcms_252_random_bytes, srcFmt: fmt, srcAlpha: skcms_AlphaFormat_Unpremul, srcProfile: A,
1662 dst: dstA, dstFmt: skcms_PixelFormat_RGB_888, dstAlpha: skcms_AlphaFormat_Unpremul, dstProfile: skcms_XYZD50_profile(),
1663 npixels)) {
1664 return false;
1665 }
1666 if (!skcms_Transform(
1667 src: skcms_252_random_bytes, srcFmt: fmt, srcAlpha: skcms_AlphaFormat_Unpremul, srcProfile: B,
1668 dst: dstB, dstFmt: skcms_PixelFormat_RGB_888, dstAlpha: skcms_AlphaFormat_Unpremul, dstProfile: skcms_XYZD50_profile(),
1669 npixels)) {
1670 return false;
1671 }
1672
1673 // TODO: make sure this final check has reasonable codegen.
1674 for (size_t i = 0; i < 252; i++) {
1675 if (abs(x: (int)dstA[i] - (int)dstB[i]) > 1) {
1676 return false;
1677 }
1678 }
1679 return true;
1680}
1681
1682bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1683 const skcms_TransferFunction* inv_tf) {
1684 if (!profile || !profile->has_trc) {
1685 return false;
1686 }
1687
1688 return skcms_AreApproximateInverses(curve: &profile->trc[0], inv_tf) &&
1689 skcms_AreApproximateInverses(curve: &profile->trc[1], inv_tf) &&
1690 skcms_AreApproximateInverses(curve: &profile->trc[2], inv_tf);
1691}
1692
1693static bool is_zero_to_one(float x) {
1694 return 0 <= x && x <= 1;
1695}
1696
1697typedef struct { float vals[3]; } skcms_Vector3;
1698
1699static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1700 skcms_Vector3 dst = {.vals: {0,0,0}};
1701 for (int row = 0; row < 3; ++row) {
1702 dst.vals[row] = m->vals[row][0] * v->vals[0]
1703 + m->vals[row][1] * v->vals[1]
1704 + m->vals[row][2] * v->vals[2];
1705 }
1706 return dst;
1707}
1708
1709bool skcms_AdaptToXYZD50(float wx, float wy,
1710 skcms_Matrix3x3* toXYZD50) {
1711 if (!is_zero_to_one(x: wx) || !is_zero_to_one(x: wy) ||
1712 !toXYZD50) {
1713 return false;
1714 }
1715
1716 // Assumes that Y is 1.0f.
1717 skcms_Vector3 wXYZ = { .vals: { wx / wy, 1, (1 - wx - wy) / wy } };
1718
1719 // Now convert toXYZ matrix to toXYZD50.
1720 skcms_Vector3 wXYZD50 = { .vals: { 0.96422f, 1.0f, 0.82521f } };
1721
1722 // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
1723 // the matrices below. The Bradford method is used by Adobe and is widely considered
1724 // to be the best.
1725 skcms_Matrix3x3 xyz_to_lms = {.vals: {
1726 { 0.8951f, 0.2664f, -0.1614f },
1727 { -0.7502f, 1.7135f, 0.0367f },
1728 { 0.0389f, -0.0685f, 1.0296f },
1729 }};
1730 skcms_Matrix3x3 lms_to_xyz = {.vals: {
1731 { 0.9869929f, -0.1470543f, 0.1599627f },
1732 { 0.4323053f, 0.5183603f, 0.0492912f },
1733 { -0.0085287f, 0.0400428f, 0.9684867f },
1734 }};
1735
1736 skcms_Vector3 srcCone = mv_mul(m: &xyz_to_lms, v: &wXYZ);
1737 skcms_Vector3 dstCone = mv_mul(m: &xyz_to_lms, v: &wXYZD50);
1738
1739 *toXYZD50 = {.vals: {
1740 { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1741 { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1742 { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1743 }};
1744 *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1745 *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1746
1747 return true;
1748}
1749
1750bool skcms_PrimariesToXYZD50(float rx, float ry,
1751 float gx, float gy,
1752 float bx, float by,
1753 float wx, float wy,
1754 skcms_Matrix3x3* toXYZD50) {
1755 if (!is_zero_to_one(x: rx) || !is_zero_to_one(x: ry) ||
1756 !is_zero_to_one(x: gx) || !is_zero_to_one(x: gy) ||
1757 !is_zero_to_one(x: bx) || !is_zero_to_one(x: by) ||
1758 !is_zero_to_one(x: wx) || !is_zero_to_one(x: wy) ||
1759 !toXYZD50) {
1760 return false;
1761 }
1762
1763 // First, we need to convert xy values (primaries) to XYZ.
1764 skcms_Matrix3x3 primaries = {.vals: {
1765 { rx, gx, bx },
1766 { ry, gy, by },
1767 { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1768 }};
1769 skcms_Matrix3x3 primaries_inv;
1770 if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1771 return false;
1772 }
1773
1774 // Assumes that Y is 1.0f.
1775 skcms_Vector3 wXYZ = { .vals: { wx / wy, 1, (1 - wx - wy) / wy } };
1776 skcms_Vector3 XYZ = mv_mul(m: &primaries_inv, v: &wXYZ);
1777
1778 skcms_Matrix3x3 toXYZ = {.vals: {
1779 { XYZ.vals[0], 0, 0 },
1780 { 0, XYZ.vals[1], 0 },
1781 { 0, 0, XYZ.vals[2] },
1782 }};
1783 toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1784
1785 skcms_Matrix3x3 DXtoD50;
1786 if (!skcms_AdaptToXYZD50(wx, wy, toXYZD50: &DXtoD50)) {
1787 return false;
1788 }
1789
1790 *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1791 return true;
1792}
1793
1794
1795bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1796 double a00 = src->vals[0][0],
1797 a01 = src->vals[1][0],
1798 a02 = src->vals[2][0],
1799 a10 = src->vals[0][1],
1800 a11 = src->vals[1][1],
1801 a12 = src->vals[2][1],
1802 a20 = src->vals[0][2],
1803 a21 = src->vals[1][2],
1804 a22 = src->vals[2][2];
1805
1806 double b0 = a00*a11 - a01*a10,
1807 b1 = a00*a12 - a02*a10,
1808 b2 = a01*a12 - a02*a11,
1809 b3 = a20,
1810 b4 = a21,
1811 b5 = a22;
1812
1813 double determinant = b0*b5
1814 - b1*b4
1815 + b2*b3;
1816
1817 if (determinant == 0) {
1818 return false;
1819 }
1820
1821 double invdet = 1.0 / determinant;
1822 if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_(x: (float)invdet)) {
1823 return false;
1824 }
1825
1826 b0 *= invdet;
1827 b1 *= invdet;
1828 b2 *= invdet;
1829 b3 *= invdet;
1830 b4 *= invdet;
1831 b5 *= invdet;
1832
1833 dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1834 dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1835 dst->vals[2][0] = (float)( + b2 );
1836 dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1837 dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1838 dst->vals[2][1] = (float)( - b1 );
1839 dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1840 dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1841 dst->vals[2][2] = (float)( + b0 );
1842
1843 for (int r = 0; r < 3; ++r)
1844 for (int c = 0; c < 3; ++c) {
1845 if (!isfinitef_(x: dst->vals[r][c])) {
1846 return false;
1847 }
1848 }
1849 return true;
1850}
1851
1852skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1853 skcms_Matrix3x3 m = { .vals: { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1854 for (int r = 0; r < 3; r++)
1855 for (int c = 0; c < 3; c++) {
1856 m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1857 + A->vals[r][1] * B->vals[1][c]
1858 + A->vals[r][2] * B->vals[2][c];
1859 }
1860 return m;
1861}
1862
1863#if defined(__clang__)
1864 [[clang::no_sanitize("float-divide-by-zero")]] // Checked for by classify() on the way out.
1865#endif
1866bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1867 TF_PQish pq;
1868 TF_HLGish hlg;
1869 switch (classify(tf: *src, pq: &pq, hlg: &hlg)) {
1870 case skcms_TFType_Invalid: return false;
1871 case skcms_TFType_sRGBish: break; // handled below
1872
1873 case skcms_TFType_PQish:
1874 *dst = { .g: TFKind_marker(kind: skcms_TFType_PQish), .a: -pq.A, .b: pq.D, .c: 1.0f/pq.F
1875 , .d: pq.B, .e: -pq.E, .f: 1.0f/pq.C};
1876 return true;
1877
1878 case skcms_TFType_HLGish:
1879 *dst = { .g: TFKind_marker(kind: skcms_TFType_HLGinvish), .a: 1.0f/hlg.R, .b: 1.0f/hlg.G
1880 , .c: 1.0f/hlg.a, .d: hlg.b, .e: hlg.c
1881 , .f: hlg.K_minus_1 };
1882 return true;
1883
1884 case skcms_TFType_HLGinvish:
1885 *dst = { .g: TFKind_marker(kind: skcms_TFType_HLGish), .a: 1.0f/hlg.R, .b: 1.0f/hlg.G
1886 , .c: 1.0f/hlg.a, .d: hlg.b, .e: hlg.c
1887 , .f: hlg.K_minus_1 };
1888 return true;
1889 }
1890
1891 assert (classify(*src) == skcms_TFType_sRGBish);
1892
1893 // We're inverting this function, solving for x in terms of y.
1894 // y = (cx + f) x < d
1895 // (ax + b)^g + e x ≥ d
1896 // The inverse of this function can be expressed in the same piecewise form.
1897 skcms_TransferFunction inv = {.g: 0,.a: 0,.b: 0,.c: 0,.d: 0,.e: 0,.f: 0};
1898
1899 // We'll start by finding the new threshold inv.d.
1900 // In principle we should be able to find that by solving for y at x=d from either side.
1901 // (If those two d values aren't the same, it's a discontinuous transfer function.)
1902 float d_l = src->c * src->d + src->f,
1903 d_r = powf_(x: src->a * src->d + src->b, y: src->g) + src->e;
1904 if (fabsf_(x: d_l - d_r) > 1/512.0f) {
1905 return false;
1906 }
1907 inv.d = d_l; // TODO(mtklein): better in practice to choose d_r?
1908
1909 // When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case.
1910 if (inv.d > 0) {
1911 // Inverting the linear section is pretty straightfoward:
1912 // y = cx + f
1913 // y - f = cx
1914 // (1/c)y - f/c = x
1915 inv.c = 1.0f/src->c;
1916 inv.f = -src->f/src->c;
1917 }
1918
1919 // The interesting part is inverting the nonlinear section:
1920 // y = (ax + b)^g + e.
1921 // y - e = (ax + b)^g
1922 // (y - e)^1/g = ax + b
1923 // (y - e)^1/g - b = ax
1924 // (1/a)(y - e)^1/g - b/a = x
1925 //
1926 // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1927 // let k = (1/a)^g
1928 // (1/a)( y - e)^1/g - b/a = x
1929 // (ky - ke)^1/g - b/a = x
1930
1931 float k = powf_(x: src->a, y: -src->g); // (1/a)^g == a^-g
1932 inv.g = 1.0f / src->g;
1933 inv.a = k;
1934 inv.b = -k * src->e;
1935 inv.e = -src->b / src->a;
1936
1937 // We need to enforce the same constraints here that we do when fitting a curve,
1938 // a >= 0 and ad+b >= 0. These constraints are checked by classify(), so they're true
1939 // of the source function if we're here.
1940
1941 // Just like when fitting the curve, there's really no way to rescue a < 0.
1942 if (inv.a < 0) {
1943 return false;
1944 }
1945 // On the other hand we can rescue an ad+b that's gone slightly negative here.
1946 if (inv.a * inv.d + inv.b < 0) {
1947 inv.b = -inv.a * inv.d;
1948 }
1949
1950 // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1951 // where we might still fail here, like non-finite parameter values.
1952 if (classify(tf: inv) != skcms_TFType_sRGBish) {
1953 return false;
1954 }
1955
1956 assert (inv.a >= 0);
1957 assert (inv.a * inv.d + inv.b >= 0);
1958
1959 // Now in principle we're done.
1960 // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1961 // e or f of the inverse, depending on which segment contains src(1.0f).
1962 float s = skcms_TransferFunction_eval(tf: src, x: 1.0f);
1963 if (!isfinitef_(x: s)) {
1964 return false;
1965 }
1966
1967 float sign = s < 0 ? -1.0f : 1.0f;
1968 s *= sign;
1969 if (s < inv.d) {
1970 inv.f = 1.0f - sign * inv.c * s;
1971 } else {
1972 inv.e = 1.0f - sign * powf_(x: inv.a * s + inv.b, y: inv.g);
1973 }
1974
1975 *dst = inv;
1976 return classify(tf: *dst) == skcms_TFType_sRGBish;
1977}
1978
1979// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1980
1981// From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1982//
1983// tf(x) = cx + f x < d
1984// tf(x) = (ax + b)^g + e x ≥ d
1985//
1986// When fitting, we add the additional constraint that both pieces meet at d:
1987//
1988// cd + f = (ad + b)^g + e
1989//
1990// Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1991//
1992// tf(x) = cx + f x < d
1993// tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
1994//
1995// Our overall strategy is then:
1996// For a couple tolerances,
1997// - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
1998// - invert c,d,f
1999// - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
2000// (and by constraint, inverted e) to the inverse of the table.
2001// Return the parameters with least maximum error.
2002//
2003// To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
2004// of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
2005//
2006// let y = Table(x)
2007// r(x) = x - f_inv(y)
2008//
2009// ∂r/∂g = ln(ay + b)*(ay + b)^g
2010// - ln(ad + b)*(ad + b)^g
2011// ∂r/∂a = yg(ay + b)^(g-1)
2012// - dg(ad + b)^(g-1)
2013// ∂r/∂b = g(ay + b)^(g-1)
2014// - g(ad + b)^(g-1)
2015
2016// Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
2017// and fill out the gradient of the residual into dfdP.
2018static float rg_nonlinear(float x,
2019 const skcms_Curve* curve,
2020 const skcms_TransferFunction* tf,
2021 float dfdP[3]) {
2022 const float y = eval_curve(curve, x);
2023
2024 const float g = tf->g, a = tf->a, b = tf->b,
2025 c = tf->c, d = tf->d, f = tf->f;
2026
2027 const float Y = fmaxf_(x: a*y + b, y: 0.0f),
2028 D = a*d + b;
2029 assert (D >= 0);
2030
2031 // The gradient.
2032 dfdP[0] = logf_(x: Y)*powf_(x: Y, y: g)
2033 - logf_(x: D)*powf_(x: D, y: g);
2034 dfdP[1] = y*g*powf_(x: Y, y: g-1)
2035 - d*g*powf_(x: D, y: g-1);
2036 dfdP[2] = g*powf_(x: Y, y: g-1)
2037 - g*powf_(x: D, y: g-1);
2038
2039 // The residual.
2040 const float f_inv = powf_(x: Y, y: g)
2041 - powf_(x: D, y: g)
2042 + c*d + f;
2043 return x - f_inv;
2044}
2045
2046static bool gauss_newton_step(const skcms_Curve* curve,
2047 skcms_TransferFunction* tf,
2048 float x0, float dx, int N) {
2049 // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
2050 //
2051 // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
2052 //
2053 // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
2054 // where r(P) is the residual vector
2055 // and Jf is the Jacobian matrix of f(), ∂r/∂P.
2056 //
2057 // Let's review the shape of each of these expressions:
2058 // r(P) is [N x 1], a column vector with one entry per value of x tested
2059 // Jf is [N x 3], a matrix with an entry for each (x,P) pair
2060 // Jf^T is [3 x N], the transpose of Jf
2061 //
2062 // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
2063 // and so is its inverse (Jf^T Jf)^-1
2064 // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
2065 //
2066 // Our implementation strategy to get to the final ∆P is
2067 // 1) evaluate Jf^T Jf, call that lhs
2068 // 2) evaluate Jf^T r(P), call that rhs
2069 // 3) invert lhs
2070 // 4) multiply inverse lhs by rhs
2071 //
2072 // This is a friendly implementation strategy because we don't have to have any
2073 // buffers that scale with N, and equally nice don't have to perform any matrix
2074 // operations that are variable size.
2075 //
2076 // Other implementation strategies could trade this off, e.g. evaluating the
2077 // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
2078 // the residuals. That would probably require implementing singular value
2079 // decomposition, and would create a [3 x N] matrix to be multiplied by the
2080 // [N x 1] residual vector, but on the upside I think that'd eliminate the
2081 // possibility of this gauss_newton_step() function ever failing.
2082
2083 // 0) start off with lhs and rhs safely zeroed.
2084 skcms_Matrix3x3 lhs = {.vals: { {0,0,0}, {0,0,0}, {0,0,0} }};
2085 skcms_Vector3 rhs = { .vals: {0,0,0} };
2086
2087 // 1,2) evaluate lhs and evaluate rhs
2088 // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
2089 // so we'll have to update lhs and rhs at the same time.
2090 for (int i = 0; i < N; i++) {
2091 float x = x0 + static_cast<float>(i)*dx;
2092
2093 float dfdP[3] = {0,0,0};
2094 float resid = rg_nonlinear(x,curve,tf, dfdP);
2095
2096 for (int r = 0; r < 3; r++) {
2097 for (int c = 0; c < 3; c++) {
2098 lhs.vals[r][c] += dfdP[r] * dfdP[c];
2099 }
2100 rhs.vals[r] += dfdP[r] * resid;
2101 }
2102 }
2103
2104 // If any of the 3 P parameters are unused, this matrix will be singular.
2105 // Detect those cases and fix them up to indentity instead, so we can invert.
2106 for (int k = 0; k < 3; k++) {
2107 if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
2108 lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
2109 lhs.vals[k][k] = 1;
2110 }
2111 }
2112
2113 // 3) invert lhs
2114 skcms_Matrix3x3 lhs_inv;
2115 if (!skcms_Matrix3x3_invert(src: &lhs, dst: &lhs_inv)) {
2116 return false;
2117 }
2118
2119 // 4) multiply inverse lhs by rhs
2120 skcms_Vector3 dP = mv_mul(m: &lhs_inv, v: &rhs);
2121 tf->g += dP.vals[0];
2122 tf->a += dP.vals[1];
2123 tf->b += dP.vals[2];
2124 return isfinitef_(x: tf->g) && isfinitef_(x: tf->a) && isfinitef_(x: tf->b);
2125}
2126
2127static float max_roundtrip_error_checked(const skcms_Curve* curve,
2128 const skcms_TransferFunction* tf_inv) {
2129 skcms_TransferFunction tf;
2130 if (!skcms_TransferFunction_invert(src: tf_inv, dst: &tf) || skcms_TFType_sRGBish != classify(tf)) {
2131 return INFINITY_;
2132 }
2133
2134 skcms_TransferFunction tf_inv_again;
2135 if (!skcms_TransferFunction_invert(src: &tf, dst: &tf_inv_again)) {
2136 return INFINITY_;
2137 }
2138
2139 return skcms_MaxRoundtripError(curve, inv_tf: &tf_inv_again);
2140}
2141
2142// Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
2143static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
2144 // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
2145 auto fixup_tf = [tf]() {
2146 // a must be non-negative. That ensures the function is monotonically increasing.
2147 // We don't really know how to fix up a if it goes negative.
2148 if (tf->a < 0) {
2149 return false;
2150 }
2151 // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
2152 // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
2153 if (tf->a * tf->d + tf->b < 0) {
2154 tf->b = -tf->a * tf->d;
2155 }
2156 assert (tf->a >= 0 &&
2157 tf->a * tf->d + tf->b >= 0);
2158
2159 // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
2160 // parameter so we can guarantee this.
2161 tf->e = tf->c*tf->d + tf->f
2162 - powf_(x: tf->a*tf->d + tf->b, y: tf->g);
2163
2164 return isfinitef_(x: tf->e);
2165 };
2166
2167 if (!fixup_tf()) {
2168 return false;
2169 }
2170
2171 // No matter where we start, dx should always represent N even steps from 0 to 1.
2172 const float dx = 1.0f / static_cast<float>(N-1);
2173
2174 skcms_TransferFunction best_tf = *tf;
2175 float best_max_error = INFINITY_;
2176
2177 // Need this or several curves get worse... *sigh*
2178 float init_error = max_roundtrip_error_checked(curve, tf_inv: tf);
2179 if (init_error < best_max_error) {
2180 best_max_error = init_error;
2181 best_tf = *tf;
2182 }
2183
2184 // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
2185 for (int j = 0; j < 8; j++) {
2186 if (!gauss_newton_step(curve, tf, x0: static_cast<float>(L)*dx, dx, N: N-L) || !fixup_tf()) {
2187 *tf = best_tf;
2188 return isfinitef_(x: best_max_error);
2189 }
2190
2191 float max_error = max_roundtrip_error_checked(curve, tf_inv: tf);
2192 if (max_error < best_max_error) {
2193 best_max_error = max_error;
2194 best_tf = *tf;
2195 }
2196 }
2197
2198 *tf = best_tf;
2199 return isfinitef_(x: best_max_error);
2200}
2201
2202bool skcms_ApproximateCurve(const skcms_Curve* curve,
2203 skcms_TransferFunction* approx,
2204 float* max_error) {
2205 if (!curve || !approx || !max_error) {
2206 return false;
2207 }
2208
2209 if (curve->table_entries == 0) {
2210 // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
2211 return false;
2212 }
2213
2214 if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
2215 // We need at least two points, and must put some reasonable cap on the maximum number.
2216 return false;
2217 }
2218
2219 int N = (int)curve->table_entries;
2220 const float dx = 1.0f / static_cast<float>(N - 1);
2221
2222 *max_error = INFINITY_;
2223 const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
2224 for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
2225 skcms_TransferFunction tf,
2226 tf_inv;
2227
2228 // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
2229 tf.f = 0.0f;
2230 int L = fit_linear(curve, N, tol: kTolerances[t], c: &tf.c, d: &tf.d);
2231
2232 if (L == N) {
2233 // If the entire data set was linear, move the coefficients to the nonlinear portion
2234 // with G == 1. This lets use a canonical representation with d == 0.
2235 tf.g = 1;
2236 tf.a = tf.c;
2237 tf.b = tf.f;
2238 tf.c = tf.d = tf.e = tf.f = 0;
2239 } else if (L == N - 1) {
2240 // Degenerate case with only two points in the nonlinear segment. Solve directly.
2241 tf.g = 1;
2242 tf.a = (eval_curve(curve, x: static_cast<float>(N-1)*dx) -
2243 eval_curve(curve, x: static_cast<float>(N-2)*dx))
2244 / dx;
2245 tf.b = eval_curve(curve, x: static_cast<float>(N-2)*dx)
2246 - tf.a * static_cast<float>(N-2)*dx;
2247 tf.e = 0;
2248 } else {
2249 // Start by guessing a gamma-only curve through the midpoint.
2250 int mid = (L + N) / 2;
2251 float mid_x = static_cast<float>(mid) / static_cast<float>(N - 1);
2252 float mid_y = eval_curve(curve, x: mid_x);
2253 tf.g = log2f_(x: mid_y) / log2f_(x: mid_x);
2254 tf.a = 1;
2255 tf.b = 0;
2256 tf.e = tf.c*tf.d + tf.f
2257 - powf_(x: tf.a*tf.d + tf.b, y: tf.g);
2258
2259
2260 if (!skcms_TransferFunction_invert(src: &tf, dst: &tf_inv) ||
2261 !fit_nonlinear(curve, L,N, tf: &tf_inv)) {
2262 continue;
2263 }
2264
2265 // We fit tf_inv, so calculate tf to keep in sync.
2266 // fit_nonlinear() should guarantee invertibility.
2267 if (!skcms_TransferFunction_invert(src: &tf_inv, dst: &tf)) {
2268 assert(false);
2269 continue;
2270 }
2271 }
2272
2273 // We'd better have a sane, sRGB-ish TF by now.
2274 // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
2275 // anything else is just some accident of math and the way we pun tf.g as a type flag.
2276 // fit_nonlinear() should guarantee this, but the special cases may fail this test.
2277 if (skcms_TFType_sRGBish != classify(tf)) {
2278 continue;
2279 }
2280
2281 // We find our error by roundtripping the table through tf_inv.
2282 //
2283 // (The most likely use case for this approximation is to be inverted and
2284 // used as the transfer function for a destination color space.)
2285 //
2286 // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
2287 // invertible, so re-verify that here (and use the new inverse for testing).
2288 // fit_nonlinear() should guarantee this, but the special cases that don't use
2289 // it may fail this test.
2290 if (!skcms_TransferFunction_invert(src: &tf, dst: &tf_inv)) {
2291 continue;
2292 }
2293
2294 float err = skcms_MaxRoundtripError(curve, inv_tf: &tf_inv);
2295 if (*max_error > err) {
2296 *max_error = err;
2297 *approx = tf;
2298 }
2299 }
2300 return isfinitef_(x: *max_error);
2301}
2302
2303// ~~~~ Impl. of skcms_Transform() ~~~~
2304
2305typedef enum {
2306 Op_load_a8,
2307 Op_load_g8,
2308 Op_load_8888_palette8,
2309 Op_load_4444,
2310 Op_load_565,
2311 Op_load_888,
2312 Op_load_8888,
2313 Op_load_1010102,
2314 Op_load_101010x_XR,
2315 Op_load_161616LE,
2316 Op_load_16161616LE,
2317 Op_load_161616BE,
2318 Op_load_16161616BE,
2319 Op_load_hhh,
2320 Op_load_hhhh,
2321 Op_load_fff,
2322 Op_load_ffff,
2323
2324 Op_swap_rb,
2325 Op_clamp,
2326 Op_invert,
2327 Op_force_opaque,
2328 Op_premul,
2329 Op_unpremul,
2330 Op_matrix_3x3,
2331 Op_matrix_3x4,
2332
2333 Op_lab_to_xyz,
2334 Op_xyz_to_lab,
2335
2336 Op_tf_r,
2337 Op_tf_g,
2338 Op_tf_b,
2339 Op_tf_a,
2340
2341 Op_pq_r,
2342 Op_pq_g,
2343 Op_pq_b,
2344 Op_pq_a,
2345
2346 Op_hlg_r,
2347 Op_hlg_g,
2348 Op_hlg_b,
2349 Op_hlg_a,
2350
2351 Op_hlginv_r,
2352 Op_hlginv_g,
2353 Op_hlginv_b,
2354 Op_hlginv_a,
2355
2356 Op_table_r,
2357 Op_table_g,
2358 Op_table_b,
2359 Op_table_a,
2360
2361 Op_clut_A2B,
2362 Op_clut_B2A,
2363
2364 Op_store_a8,
2365 Op_store_g8,
2366 Op_store_4444,
2367 Op_store_565,
2368 Op_store_888,
2369 Op_store_8888,
2370 Op_store_1010102,
2371 Op_store_161616LE,
2372 Op_store_16161616LE,
2373 Op_store_161616BE,
2374 Op_store_16161616BE,
2375 Op_store_101010x_XR,
2376 Op_store_hhh,
2377 Op_store_hhhh,
2378 Op_store_fff,
2379 Op_store_ffff,
2380} Op;
2381
2382#if defined(__clang__)
2383 template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
2384#elif defined(__GNUC__)
2385 // For some reason GCC accepts this nonsense, but not the more straightforward version,
2386 // template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
2387 template <int N, typename T>
2388 struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
2389
2390 template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
2391#endif
2392
2393// First, instantiate our default exec_ops() implementation using the default compiliation target.
2394
2395namespace baseline {
2396#if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2397 || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2398 #define N 1
2399 template <typename T> using V = T;
2400 using Color = float;
2401#elif defined(__AVX512F__) && defined(__AVX512DQ__)
2402 #define N 16
2403 template <typename T> using V = Vec<N,T>;
2404 using Color = float;
2405#elif defined(__AVX__)
2406 #define N 8
2407 template <typename T> using V = Vec<N,T>;
2408 using Color = float;
2409#else
2410 #define N 4
2411 template <typename T> using V = Vec<N,T>;
2412 using Color = float;
2413#endif
2414
2415 #include "src/Transform_inl.h"
2416 #undef N
2417}
2418
2419// Now, instantiate any other versions of run_program() we may want for runtime detection.
2420#if !defined(SKCMS_PORTABLE) && \
2421 !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) && \
2422 (( defined(__clang__) && __clang_major__ >= 5) || \
2423 (!defined(__clang__) && defined(__GNUC__))) \
2424 && defined(__x86_64__)
2425
2426 #if !defined(__AVX2__)
2427 #if defined(__clang__)
2428 #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2429 #elif defined(__GNUC__)
2430 #pragma GCC push_options
2431 #pragma GCC target("avx2,f16c")
2432 #endif
2433
2434 namespace hsw {
2435 #define USING_AVX
2436 #define USING_AVX_F16C
2437 #define USING_AVX2
2438 #define N 8
2439 template <typename T> using V = Vec<N,T>;
2440 using Color = float;
2441
2442 #include "src/Transform_inl.h"
2443
2444 // src/Transform_inl.h will undefine USING_* for us.
2445 #undef N
2446 }
2447
2448 #if defined(__clang__)
2449 #pragma clang attribute pop
2450 #elif defined(__GNUC__)
2451 #pragma GCC pop_options
2452 #endif
2453
2454 #define TEST_FOR_HSW
2455 #endif
2456
2457 #if !defined(__AVX512F__) || !defined(__AVX512DQ__)
2458 #if defined(__clang__)
2459 #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2460 #elif defined(__GNUC__)
2461 #pragma GCC push_options
2462 #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2463 #endif
2464
2465 namespace skx {
2466 #define USING_AVX512F
2467 #define N 16
2468 template <typename T> using V = Vec<N,T>;
2469 using Color = float;
2470
2471 #include "src/Transform_inl.h"
2472
2473 // src/Transform_inl.h will undefine USING_* for us.
2474 #undef N
2475 }
2476
2477 #if defined(__clang__)
2478 #pragma clang attribute pop
2479 #elif defined(__GNUC__)
2480 #pragma GCC pop_options
2481 #endif
2482
2483 #define TEST_FOR_SKX
2484 #endif
2485
2486 #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2487 enum class CpuType { None, HSW, SKX };
2488 static CpuType cpu_type() {
2489 static const CpuType type = []{
2490 if (!runtime_cpu_detection) {
2491 return CpuType::None;
2492 }
2493 // See http://www.sandpile.org/x86/cpuid.htm
2494
2495 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2496 uint32_t eax, ebx, ecx, edx;
2497 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2498 : "0"(1), "2"(0));
2499 if ((edx & (1u<<25)) && // SSE
2500 (edx & (1u<<26)) && // SSE2
2501 (ecx & (1u<< 0)) && // SSE3
2502 (ecx & (1u<< 9)) && // SSSE3
2503 (ecx & (1u<<12)) && // FMA (N.B. not used, avoided even)
2504 (ecx & (1u<<19)) && // SSE4.1
2505 (ecx & (1u<<20)) && // SSE4.2
2506 (ecx & (1u<<26)) && // XSAVE
2507 (ecx & (1u<<27)) && // OSXSAVE
2508 (ecx & (1u<<28)) && // AVX
2509 (ecx & (1u<<29))) { // F16C
2510
2511 // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2512 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2513 : "0"(7), "2"(0));
2514 // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2515 uint32_t xcr0, dont_need_edx;
2516 __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2517
2518 if ((xcr0 & (1u<<1)) && // XMM register state saved?
2519 (xcr0 & (1u<<2)) && // YMM register state saved?
2520 (ebx & (1u<<5))) { // AVX2
2521 // At this point we're at least HSW. Continue checking for SKX.
2522 if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2523 (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2524 (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2525 (ebx & (1u<<16)) && // AVX512F
2526 (ebx & (1u<<17)) && // AVX512DQ
2527 (ebx & (1u<<28)) && // AVX512CD
2528 (ebx & (1u<<30)) && // AVX512BW
2529 (ebx & (1u<<31))) { // AVX512VL
2530 return CpuType::SKX;
2531 }
2532 return CpuType::HSW;
2533 }
2534 }
2535 return CpuType::None;
2536 }();
2537 return type;
2538 }
2539 #endif
2540
2541#endif
2542
2543typedef struct {
2544 Op op;
2545 const void* arg;
2546} OpAndArg;
2547
2548static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2549 static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2550 { .sRGBish: Op_tf_r, .PQish: Op_pq_r, .HLGish: Op_hlg_r, .HLGinvish: Op_hlginv_r, .table: Op_table_r },
2551 { .sRGBish: Op_tf_g, .PQish: Op_pq_g, .HLGish: Op_hlg_g, .HLGinvish: Op_hlginv_g, .table: Op_table_g },
2552 { .sRGBish: Op_tf_b, .PQish: Op_pq_b, .HLGish: Op_hlg_b, .HLGinvish: Op_hlginv_b, .table: Op_table_b },
2553 { .sRGBish: Op_tf_a, .PQish: Op_pq_a, .HLGish: Op_hlg_a, .HLGinvish: Op_hlginv_a, .table: Op_table_a },
2554 };
2555 const auto& op = ops[channel];
2556
2557 if (curve->table_entries == 0) {
2558 const OpAndArg noop = { .op: Op_load_a8/*doesn't matter*/, .arg: nullptr };
2559
2560 const skcms_TransferFunction& tf = curve->parametric;
2561
2562 if (tf.g == 1 && tf.a == 1 &&
2563 tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2564 return noop;
2565 }
2566
2567 switch (classify(tf)) {
2568 case skcms_TFType_Invalid: return noop;
2569 case skcms_TFType_sRGBish: return OpAndArg{.op: op.sRGBish, .arg: &tf};
2570 case skcms_TFType_PQish: return OpAndArg{.op: op.PQish, .arg: &tf};
2571 case skcms_TFType_HLGish: return OpAndArg{.op: op.HLGish, .arg: &tf};
2572 case skcms_TFType_HLGinvish: return OpAndArg{.op: op.HLGinvish, .arg: &tf};
2573 }
2574 }
2575 return OpAndArg{.op: op.table, .arg: curve};
2576}
2577
2578static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2579 switch (fmt >> 1) { // ignore rgb/bgr
2580 case skcms_PixelFormat_A_8 >> 1: return 1;
2581 case skcms_PixelFormat_G_8 >> 1: return 1;
2582 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return 1;
2583 case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
2584 case skcms_PixelFormat_RGB_565 >> 1: return 2;
2585 case skcms_PixelFormat_RGB_888 >> 1: return 3;
2586 case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
2587 case skcms_PixelFormat_RGBA_8888_sRGB >> 1: return 4;
2588 case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
2589 case skcms_PixelFormat_RGB_101010x_XR >> 1: return 4;
2590 case skcms_PixelFormat_RGB_161616LE >> 1: return 6;
2591 case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8;
2592 case skcms_PixelFormat_RGB_161616BE >> 1: return 6;
2593 case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8;
2594 case skcms_PixelFormat_RGB_hhh_Norm >> 1: return 6;
2595 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: return 8;
2596 case skcms_PixelFormat_RGB_hhh >> 1: return 6;
2597 case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
2598 case skcms_PixelFormat_RGB_fff >> 1: return 12;
2599 case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
2600 }
2601 assert(false);
2602 return 0;
2603}
2604
2605static bool prep_for_destination(const skcms_ICCProfile* profile,
2606 skcms_Matrix3x3* fromXYZD50,
2607 skcms_TransferFunction* invR,
2608 skcms_TransferFunction* invG,
2609 skcms_TransferFunction* invB) {
2610 // skcms_Transform() supports B2A destinations...
2611 if (profile->has_B2A) { return true; }
2612 // ...and destinations with parametric transfer functions and an XYZD50 gamut matrix.
2613 return profile->has_trc
2614 && profile->has_toXYZD50
2615 && profile->trc[0].table_entries == 0
2616 && profile->trc[1].table_entries == 0
2617 && profile->trc[2].table_entries == 0
2618 && skcms_TransferFunction_invert(src: &profile->trc[0].parametric, dst: invR)
2619 && skcms_TransferFunction_invert(src: &profile->trc[1].parametric, dst: invG)
2620 && skcms_TransferFunction_invert(src: &profile->trc[2].parametric, dst: invB)
2621 && skcms_Matrix3x3_invert(src: &profile->toXYZD50, dst: fromXYZD50);
2622}
2623
2624bool skcms_Transform(const void* src,
2625 skcms_PixelFormat srcFmt,
2626 skcms_AlphaFormat srcAlpha,
2627 const skcms_ICCProfile* srcProfile,
2628 void* dst,
2629 skcms_PixelFormat dstFmt,
2630 skcms_AlphaFormat dstAlpha,
2631 const skcms_ICCProfile* dstProfile,
2632 size_t npixels) {
2633 return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2634 dst, dstFmt, dstAlpha, dstProfile,
2635 npixels, palette: nullptr);
2636}
2637
2638bool skcms_TransformWithPalette(const void* src,
2639 skcms_PixelFormat srcFmt,
2640 skcms_AlphaFormat srcAlpha,
2641 const skcms_ICCProfile* srcProfile,
2642 void* dst,
2643 skcms_PixelFormat dstFmt,
2644 skcms_AlphaFormat dstAlpha,
2645 const skcms_ICCProfile* dstProfile,
2646 size_t nz,
2647 const void* palette) {
2648 const size_t dst_bpp = bytes_per_pixel(fmt: dstFmt),
2649 src_bpp = bytes_per_pixel(fmt: srcFmt);
2650 // Let's just refuse if the request is absurdly big.
2651 if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2652 return false;
2653 }
2654 int n = (int)nz;
2655
2656 // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2657 if (!srcProfile) {
2658 srcProfile = skcms_sRGB_profile();
2659 }
2660 if (!dstProfile) {
2661 dstProfile = skcms_sRGB_profile();
2662 }
2663
2664 // We can't transform in place unless the PixelFormats are the same size.
2665 if (dst == src && dst_bpp != src_bpp) {
2666 return false;
2667 }
2668 // TODO: more careful alias rejection (like, dst == src + 1)?
2669
2670 if (needs_palette(fmt: srcFmt) && !palette) {
2671 return false;
2672 }
2673
2674 Op program [32];
2675 const void* arguments[32];
2676
2677 Op* ops = program;
2678 const void** args = arguments;
2679
2680 // These are always parametric curves of some sort.
2681 skcms_Curve dst_curves[3];
2682 dst_curves[0].table_entries =
2683 dst_curves[1].table_entries =
2684 dst_curves[2].table_entries = 0;
2685
2686 skcms_Matrix3x3 from_xyz;
2687
2688 switch (srcFmt >> 1) {
2689 default: return false;
2690 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break;
2691 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break;
2692 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break;
2693 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break;
2694 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break;
2695 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break;
2696 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break;
2697 case skcms_PixelFormat_RGB_101010x_XR >> 1: *ops++ = Op_load_101010x_XR; break;
2698 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_load_161616LE; break;
2699 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2700 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_load_161616BE; break;
2701 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2702 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_load_hhh; break;
2703 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_load_hhhh; break;
2704 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break;
2705 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break;
2706 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break;
2707 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break;
2708
2709 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++ = Op_load_8888_palette8;
2710 *args++ = palette;
2711 break;
2712 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2713 *ops++ = Op_load_8888;
2714 *ops++ = Op_tf_r; *args++ = skcms_sRGB_TransferFunction();
2715 *ops++ = Op_tf_g; *args++ = skcms_sRGB_TransferFunction();
2716 *ops++ = Op_tf_b; *args++ = skcms_sRGB_TransferFunction();
2717 break;
2718 }
2719 if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2720 srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2721 *ops++ = Op_clamp;
2722 }
2723 if (srcFmt & 1) {
2724 *ops++ = Op_swap_rb;
2725 }
2726 skcms_ICCProfile gray_dst_profile;
2727 if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2728 // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2729 // luminance (Y) by the destination transfer function.
2730 gray_dst_profile = *dstProfile;
2731 skcms_SetXYZD50(p: &gray_dst_profile, m: &skcms_XYZD50_profile()->toXYZD50);
2732 dstProfile = &gray_dst_profile;
2733 }
2734
2735 if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2736 // Photoshop creates CMYK images as inverse CMYK.
2737 // These happen to be the only ones we've _ever_ seen.
2738 *ops++ = Op_invert;
2739 // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2740 srcAlpha = skcms_AlphaFormat_Unpremul;
2741 }
2742
2743 if (srcAlpha == skcms_AlphaFormat_Opaque) {
2744 *ops++ = Op_force_opaque;
2745 } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2746 *ops++ = Op_unpremul;
2747 }
2748
2749 if (dstProfile != srcProfile) {
2750
2751 if (!prep_for_destination(profile: dstProfile,
2752 fromXYZD50: &from_xyz,
2753 invR: &dst_curves[0].parametric,
2754 invG: &dst_curves[1].parametric,
2755 invB: &dst_curves[2].parametric)) {
2756 return false;
2757 }
2758
2759 if (srcProfile->has_A2B) {
2760 if (srcProfile->A2B.input_channels) {
2761 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2762 OpAndArg oa = select_curve_op(curve: &srcProfile->A2B.input_curves[i], channel: i);
2763 if (oa.arg) {
2764 *ops++ = oa.op;
2765 *args++ = oa.arg;
2766 }
2767 }
2768 *ops++ = Op_clamp;
2769 *ops++ = Op_clut_A2B;
2770 *args++ = &srcProfile->A2B;
2771 }
2772
2773 if (srcProfile->A2B.matrix_channels == 3) {
2774 for (int i = 0; i < 3; i++) {
2775 OpAndArg oa = select_curve_op(curve: &srcProfile->A2B.matrix_curves[i], channel: i);
2776 if (oa.arg) {
2777 *ops++ = oa.op;
2778 *args++ = oa.arg;
2779 }
2780 }
2781
2782 static const skcms_Matrix3x4 I = {.vals: {
2783 {1,0,0,0},
2784 {0,1,0,0},
2785 {0,0,1,0},
2786 }};
2787 if (0 != memcmp(s1: &I, s2: &srcProfile->A2B.matrix, n: sizeof(I))) {
2788 *ops++ = Op_matrix_3x4;
2789 *args++ = &srcProfile->A2B.matrix;
2790 }
2791 }
2792
2793 if (srcProfile->A2B.output_channels == 3) {
2794 for (int i = 0; i < 3; i++) {
2795 OpAndArg oa = select_curve_op(curve: &srcProfile->A2B.output_curves[i], channel: i);
2796 if (oa.arg) {
2797 *ops++ = oa.op;
2798 *args++ = oa.arg;
2799 }
2800 }
2801 }
2802
2803 if (srcProfile->pcs == skcms_Signature_Lab) {
2804 *ops++ = Op_lab_to_xyz;
2805 }
2806
2807 } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2808 for (int i = 0; i < 3; i++) {
2809 OpAndArg oa = select_curve_op(curve: &srcProfile->trc[i], channel: i);
2810 if (oa.arg) {
2811 *ops++ = oa.op;
2812 *args++ = oa.arg;
2813 }
2814 }
2815 } else {
2816 return false;
2817 }
2818
2819 // A2B sources are in XYZD50 by now, but TRC sources are still in their original gamut.
2820 assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2821
2822 if (dstProfile->has_B2A) {
2823 // B2A needs its input in XYZD50, so transform TRC sources now.
2824 if (!srcProfile->has_A2B) {
2825 *ops++ = Op_matrix_3x3;
2826 *args++ = &srcProfile->toXYZD50;
2827 }
2828
2829 if (dstProfile->pcs == skcms_Signature_Lab) {
2830 *ops++ = Op_xyz_to_lab;
2831 }
2832
2833 if (dstProfile->B2A.input_channels == 3) {
2834 for (int i = 0; i < 3; i++) {
2835 OpAndArg oa = select_curve_op(curve: &dstProfile->B2A.input_curves[i], channel: i);
2836 if (oa.arg) {
2837 *ops++ = oa.op;
2838 *args++ = oa.arg;
2839 }
2840 }
2841 }
2842
2843 if (dstProfile->B2A.matrix_channels == 3) {
2844 static const skcms_Matrix3x4 I = {.vals: {
2845 {1,0,0,0},
2846 {0,1,0,0},
2847 {0,0,1,0},
2848 }};
2849 if (0 != memcmp(s1: &I, s2: &dstProfile->B2A.matrix, n: sizeof(I))) {
2850 *ops++ = Op_matrix_3x4;
2851 *args++ = &dstProfile->B2A.matrix;
2852 }
2853
2854 for (int i = 0; i < 3; i++) {
2855 OpAndArg oa = select_curve_op(curve: &dstProfile->B2A.matrix_curves[i], channel: i);
2856 if (oa.arg) {
2857 *ops++ = oa.op;
2858 *args++ = oa.arg;
2859 }
2860 }
2861 }
2862
2863 if (dstProfile->B2A.output_channels) {
2864 *ops++ = Op_clamp;
2865 *ops++ = Op_clut_B2A;
2866 *args++ = &dstProfile->B2A;
2867 for (int i = 0; i < (int)dstProfile->B2A.output_channels; i++) {
2868 OpAndArg oa = select_curve_op(curve: &dstProfile->B2A.output_curves[i], channel: i);
2869 if (oa.arg) {
2870 *ops++ = oa.op;
2871 *args++ = oa.arg;
2872 }
2873 }
2874 }
2875 } else {
2876 // This is a TRC destination.
2877 // We'll concat any src->xyz matrix with our xyz->dst matrix into one src->dst matrix.
2878 // (A2B sources are already in XYZD50, making that src->xyz matrix I.)
2879 static const skcms_Matrix3x3 I = {.vals: {
2880 { 1.0f, 0.0f, 0.0f },
2881 { 0.0f, 1.0f, 0.0f },
2882 { 0.0f, 0.0f, 1.0f },
2883 }};
2884 const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2885
2886 // There's a chance the source and destination gamuts are identical,
2887 // in which case we can skip the gamut transform.
2888 if (0 != memcmp(s1: &dstProfile->toXYZD50, s2: to_xyz, n: sizeof(skcms_Matrix3x3))) {
2889 // Concat the entire gamut transform into from_xyz,
2890 // now slightly misnamed but it's a handy spot to stash the result.
2891 from_xyz = skcms_Matrix3x3_concat(A: &from_xyz, B: to_xyz);
2892 *ops++ = Op_matrix_3x3;
2893 *args++ = &from_xyz;
2894 }
2895
2896 // Encode back to dst RGB using its parametric transfer functions.
2897 for (int i = 0; i < 3; i++) {
2898 OpAndArg oa = select_curve_op(curve: dst_curves+i, channel: i);
2899 if (oa.arg) {
2900 assert (oa.op != Op_table_r &&
2901 oa.op != Op_table_g &&
2902 oa.op != Op_table_b &&
2903 oa.op != Op_table_a);
2904 *ops++ = oa.op;
2905 *args++ = oa.arg;
2906 }
2907 }
2908 }
2909 }
2910
2911 // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2912 // not just to values that fit in [0,1].
2913 //
2914 // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2915 // but would be carrying r > 1, which is really unexpected for downstream consumers.
2916 if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2917 *ops++ = Op_clamp;
2918 }
2919
2920 if (dstProfile->data_color_space == skcms_Signature_CMYK) {
2921 // Photoshop creates CMYK images as inverse CMYK.
2922 // These happen to be the only ones we've _ever_ seen.
2923 *ops++ = Op_invert;
2924
2925 // CMYK has no alpha channel, so make sure dstAlpha is a no-op.
2926 dstAlpha = skcms_AlphaFormat_Unpremul;
2927 }
2928
2929 if (dstAlpha == skcms_AlphaFormat_Opaque) {
2930 *ops++ = Op_force_opaque;
2931 } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2932 *ops++ = Op_premul;
2933 }
2934 if (dstFmt & 1) {
2935 *ops++ = Op_swap_rb;
2936 }
2937 switch (dstFmt >> 1) {
2938 default: return false;
2939 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break;
2940 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break;
2941 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break;
2942 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break;
2943 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break;
2944 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break;
2945 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break;
2946 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_store_161616LE; break;
2947 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2948 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_store_161616BE; break;
2949 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2950 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_store_hhh; break;
2951 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_store_hhhh; break;
2952 case skcms_PixelFormat_RGB_101010x_XR >> 1: *ops++ = Op_store_101010x_XR; break;
2953 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break;
2954 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break;
2955 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break;
2956 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break;
2957
2958 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2959 *ops++ = Op_tf_r; *args++ = skcms_sRGB_Inverse_TransferFunction();
2960 *ops++ = Op_tf_g; *args++ = skcms_sRGB_Inverse_TransferFunction();
2961 *ops++ = Op_tf_b; *args++ = skcms_sRGB_Inverse_TransferFunction();
2962 *ops++ = Op_store_8888;
2963 break;
2964 }
2965
2966 auto run = baseline::run_program;
2967#if defined(TEST_FOR_HSW)
2968 switch (cpu_type()) {
2969 case CpuType::None: break;
2970 case CpuType::HSW: run = hsw::run_program; break;
2971 case CpuType::SKX: run = hsw::run_program; break;
2972 }
2973#endif
2974#if defined(TEST_FOR_SKX)
2975 switch (cpu_type()) {
2976 case CpuType::None: break;
2977 case CpuType::HSW: break;
2978 case CpuType::SKX: run = skx::run_program; break;
2979 }
2980#endif
2981 run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2982 return true;
2983}
2984
2985static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2986#if defined(NDEBUG)
2987 (void)profile;
2988#else
2989 skcms_Matrix3x3 fromXYZD50;
2990 skcms_TransferFunction invR, invG, invB;
2991 assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2992#endif
2993}
2994
2995bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2996 if (!profile->has_B2A) {
2997 skcms_Matrix3x3 fromXYZD50;
2998 if (!profile->has_trc || !profile->has_toXYZD50
2999 || !skcms_Matrix3x3_invert(src: &profile->toXYZD50, dst: &fromXYZD50)) {
3000 return false;
3001 }
3002
3003 skcms_TransferFunction tf[3];
3004 for (int i = 0; i < 3; i++) {
3005 skcms_TransferFunction inv;
3006 if (profile->trc[i].table_entries == 0
3007 && skcms_TransferFunction_invert(src: &profile->trc[i].parametric, dst: &inv)) {
3008 tf[i] = profile->trc[i].parametric;
3009 continue;
3010 }
3011
3012 float max_error;
3013 // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
3014 if (!skcms_ApproximateCurve(curve: &profile->trc[i], approx: &tf[i], max_error: &max_error)) {
3015 return false;
3016 }
3017 }
3018
3019 for (int i = 0; i < 3; ++i) {
3020 profile->trc[i].table_entries = 0;
3021 profile->trc[i].parametric = tf[i];
3022 }
3023 }
3024 assert_usable_as_destination(profile);
3025 return true;
3026}
3027
3028bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
3029 // Call skcms_MakeUsableAsDestination() with B2A disabled;
3030 // on success that'll return a TRC/XYZ profile with three skcms_TransferFunctions.
3031 skcms_ICCProfile result = *profile;
3032 result.has_B2A = false;
3033 if (!skcms_MakeUsableAsDestination(profile: &result)) {
3034 return false;
3035 }
3036
3037 // Of the three, pick the transfer function that best fits the other two.
3038 int best_tf = 0;
3039 float min_max_error = INFINITY_;
3040 for (int i = 0; i < 3; i++) {
3041 skcms_TransferFunction inv;
3042 if (!skcms_TransferFunction_invert(src: &result.trc[i].parametric, dst: &inv)) {
3043 return false;
3044 }
3045
3046 float err = 0;
3047 for (int j = 0; j < 3; ++j) {
3048 err = fmaxf_(x: err, y: skcms_MaxRoundtripError(curve: &profile->trc[j], inv_tf: &inv));
3049 }
3050 if (min_max_error > err) {
3051 min_max_error = err;
3052 best_tf = i;
3053 }
3054 }
3055
3056 for (int i = 0; i < 3; i++) {
3057 result.trc[i].parametric = result.trc[best_tf].parametric;
3058 }
3059
3060 *profile = result;
3061 assert_usable_as_destination(profile);
3062 return true;
3063}
3064

source code of flutter_engine/third_party/skia/modules/skcms/skcms.cc