• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

saitoha / libsixel / 19918707358

04 Dec 2025 05:12AM UTC coverage: 38.402% (-4.0%) from 42.395%
19918707358

push

github

saitoha
tests: fix meson msys dll lookup

9738 of 38220 branches covered (25.48%)

12841 of 33438 relevant lines covered (38.4%)

782420.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

8.72
/src/colorspace.c
1
/*
2
 * SPDX-License-Identifier: MIT
3
 *
4
 * Copyright (c) 2025 libsixel developers. See `AUTHORS`.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to
8
 * deal in the Software without restriction, including without limitation the
9
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
 * sell copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 */
24

25
#include "config.h"
26

27
#include <math.h>
28
#include <stdint.h>
29
#include <stddef.h>
30
#include <stdlib.h>
31
#include <errno.h>
32
#include <limits.h>
33

34
#include <sixel.h>
35

36
#include "colorspace.h"
37
#include "cpu.h"
38
#include "logger.h"
39
#if SIXEL_ENABLE_THREADS
40
# include "sixel_threads_config.h"
41
# include "threadpool.h"
42
#endif
43

44
#if defined(HAVE_IMMINTRIN_H) && \
45
    (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
46
     defined(_M_IX86))
47
# define SIXEL_HAS_X86_INTRIN 1
48
# include <immintrin.h>
49
#endif
50

51
#if defined(HAVE_SSE2)
52
# if defined(__SSE2__)
53
#  if defined(HAVE_EMMINTRIN_H)
54
#   include <emmintrin.h>
55
#   define SIXEL_USE_SSE2 1
56
#  endif
57
# endif
58
#endif
59

60
#if defined(SIXEL_HAS_X86_INTRIN)
61
# if defined(__GNUC__)
62
#  if !defined(__clang__)
63
#   define SIXEL_TARGET_AVX2 __attribute__((target("avx2")))
64
#   define SIXEL_TARGET_AVX512 \
65
        __attribute__((target("avx512f,avx512bw")))
66
#   define SIXEL_USE_AVX2 1
67
#   define SIXEL_USE_AVX512 1
68
#  else
69
/*
70
 * clang rejects returning AVX vectors when the translation unit target
71
 * does not already include the corresponding ISA. Guard runtime AVX
72
 * helpers with compile-time ISA availability to keep non-AVX builds
73
 * warning-free while still using AVX when the compiler enables it.
74
 */
75
#   define SIXEL_TARGET_AVX2
76
#   define SIXEL_TARGET_AVX512
77
#   if defined(__AVX2__)
78
#    define SIXEL_USE_AVX2 1
79
#   endif
80
#   if defined(__AVX512F__) && defined(__AVX512BW__)
81
#    define SIXEL_USE_AVX512 1
82
#   endif
83
#  endif
84
# else
85
#  define SIXEL_TARGET_AVX2
86
#  define SIXEL_TARGET_AVX512
87
#  if defined(__AVX2__)
88
#   define SIXEL_USE_AVX2 1
89
#  endif
90
#  if defined(__AVX512F__) && defined(__AVX512BW__)
91
#   define SIXEL_USE_AVX512 1
92
#  endif
93
# endif
94
#endif
95

96
#if defined(HAVE_NEON)
97
# if (defined(__ARM_NEON) || defined(__ARM_NEON__))
98
#  if defined(HAVE_ARM_NEON_H)
99
#   include <arm_neon.h>
100
#   define SIXEL_USE_NEON 1
101
#  endif
102
# endif
103
#endif
104

105
#define SIXEL_COLORSPACE_LUT_SIZE 256
106
#define SIXEL_OKLAB_AB_OFFSET 0.5
107
#define SIXEL_OKLAB_AB_SCALE  255.0
108
#define SIXEL_CIELAB_AB_SCALE 128.0
109
#define SIXEL_CIELAB_L_SCALE  100.0
110
#define SIXEL_CIELAB_AB_LIMIT 1.5
111
#define SIXEL_DIN99D_L_SCALE  100.0
112
#define SIXEL_DIN99D_AB_RANGE 50.0
113

114
#if defined(__FMA__)
115
# define SIXEL_FMADD_PS256(a, b, c) _mm256_fmadd_ps((a), (b), (c))
116
# define SIXEL_FMADD_PS512(a, b, c) _mm512_fmadd_ps((a), (b), (c))
117
#else
118
# define SIXEL_FMADD_PS256(a, b, c) \
119
    _mm256_add_ps(_mm256_mul_ps((a), (b)), (c))
120
# define SIXEL_FMADD_PS512(a, b, c) \
121
    _mm512_add_ps(_mm512_mul_ps((a), (b)), (c))
122
#endif
123

124
static const double sixel_linear_srgb_to_smptec_matrix[3][3] = {
125
    { 1.0651944799343782, -0.05539144537002962, -0.009975616485882548 },
126
    { -0.019633066659433226,  1.0363870284433383, -0.016731961783904975 },
127
    { 0.0016324889176928742,  0.004413466273704836,  0.994192644808602 }
128
};
129

130
static const double sixel_linear_smptec_to_srgb_matrix[3][3] = {
131
    { 0.9397048483892231,  0.05018036042570272,  0.010273409684415205 },
132
    { 0.01777536262173348, 0.9657705626655305,  0.01643197976410589 },
133
    { -0.0016219271954016755, -0.00436969856687614,  1.0057514450874723 }
134
};
135

136
#if (defined(SIXEL_USE_AVX512) && defined(__AVX512F__) && \
137
        defined(__AVX512BW__)) || \
138
        (defined(SIXEL_USE_AVX2) && defined(__AVX2__)) || \
139
        defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
140
static const float sixel_linear_srgb_to_smptec_matrix_f32[3][3] = {
141
    { 1.0651945f, -0.05539145f, -0.009975616f },
142
    { -0.019633066f, 1.0363870f, -0.016731962f },
143
    { 0.0016324889f, 0.0044134663f, 0.99419266f }
144
};
145

146
static const float sixel_linear_smptec_to_srgb_matrix_f32[3][3] = {
147
    { 0.93970484f, 0.050180361f, 0.010273410f },
148
    { 0.017775363f, 0.96577054f, 0.016431980f },
149
    { -0.0016219272f, -0.0043696986f, 1.0057515f }
150
};
151
#endif
152

153
#if defined(SIXEL_USE_NEON)
154
static uint8x16x4_t sixel_neon_gamma_to_linear[4];
155
static uint8x16x4_t sixel_neon_linear_to_gamma[4];
156
static int sixel_neon_tables_initialized = 0;
157
#endif
158

159
#if (defined(SIXEL_USE_AVX2) && defined(__AVX2__)) || \
160
        (defined(SIXEL_USE_AVX512) && defined(__AVX512F__) && \
161
         defined(__AVX512BW__))
162
static uint32_t sixel_avx_gamma_to_linear_lut32[SIXEL_COLORSPACE_LUT_SIZE];
163
static uint32_t sixel_avx_linear_to_gamma_lut32[SIXEL_COLORSPACE_LUT_SIZE];
164
static int sixel_avx_tables_initialized = 0;
165
static SIXELSTATUS sixel_colorspace_convert_avx512(unsigned char *pixels,
166
                                                   size_t size,
167
                                                   int pixelformat,
168
                                                   int colorspace_src,
169
                                                   int colorspace_dst);
170
static SIXELSTATUS sixel_colorspace_convert_avx2(unsigned char *pixels,
171
                                                 size_t size,
172
                                                 int pixelformat,
173
                                                 int colorspace_src,
174
                                                 int colorspace_dst);
175
#elif defined(SIXEL_USE_AVX2)
176
static __attribute__((unused)) SIXELSTATUS
177
sixel_colorspace_convert_avx2(unsigned char *pixels,
178
                              size_t size,
179
                              int pixelformat,
180
                              int colorspace_src,
181
                              int colorspace_dst)
182
{
183
    (void)pixels;
184
    (void)size;
185
    (void)pixelformat;
186
    (void)colorspace_src;
187
    (void)colorspace_dst;
188

189
    return SIXEL_BAD_INPUT;
190
}
191
#endif
192

193
#if defined(SIXEL_USE_AVX512) && \
194
        !(defined(__AVX512F__) && defined(__AVX512BW__))
195
static __attribute__((unused)) SIXELSTATUS
196
sixel_colorspace_convert_avx512(unsigned char *pixels,
197
                                size_t size,
198
                                int pixelformat,
199
                                int colorspace_src,
200
                                int colorspace_dst)
201
{
202
    (void)pixels;
203
    (void)size;
204
    (void)pixelformat;
205
    (void)colorspace_src;
206
    (void)colorspace_dst;
207

208
    return SIXEL_BAD_INPUT;
209
}
210
#endif
211

212
#if defined(SIXEL_USE_SSE2)
213
static SIXELSTATUS sixel_colorspace_convert_sse2(unsigned char *pixels,
214
                                                 size_t size,
215
                                                 int pixelformat,
216
                                                 int colorspace_src,
217
                                                 int colorspace_dst);
218
#endif
219

220
static unsigned char gamma_to_linear_lut[SIXEL_COLORSPACE_LUT_SIZE];
221
static unsigned char linear_to_gamma_lut[SIXEL_COLORSPACE_LUT_SIZE];
222
static int tables_initialized = 0;
223

224
static inline double
225
sixel_clamp_unit(double value)
×
226
{
227
    if (value < 0.0) {
×
228
        return 0.0;
229
    }
230
    if (value > 1.0) {
×
231
        return 1.0;
232
    }
233
    return value;
234
}
235

236
static inline double
237
sixel_srgb_unit_to_linear(double value)
×
238
{
239
    double x;
×
240

241
    x = sixel_clamp_unit(value);
×
242
    if (x <= 0.04045) {
×
243
        return x / 12.92;
×
244
    }
245

246
    return pow((x + 0.055) / 1.055, 2.4);
×
247
}
248

249
static inline double
250
sixel_linear_to_srgb_unit(double value)
×
251
{
252
    double y;
×
253

254
    if (value <= 0.0) {
×
255
        return 0.0;
256
    }
257
    if (value >= 1.0) {
×
258
        return 1.0;
259
    }
260

261
    if (value <= 0.0031308) {
×
262
        y = value * 12.92;
×
263
    } else {
264
        y = 1.055 * pow(value, 1.0 / 2.4) - 0.055;
×
265
    }
266

267
    return sixel_clamp_unit(y);
×
268
}
269

270
static inline double
271
sixel_smptec_unit_to_linear(double value)
×
272
{
273
    double x;
×
274

275
    x = sixel_clamp_unit(value);
×
276
    if (x <= 0.0) {
×
277
        return 0.0;
278
    }
279
    if (x >= 1.0) {
×
280
        return 1.0;
281
    }
282

283
    return pow(x, 2.2);
×
284
}
285

286
static inline double
287
sixel_linear_to_smptec_unit(double value)
×
288
{
289
    double y;
×
290

291
    if (value <= 0.0) {
×
292
        return 0.0;
293
    }
294
    if (value >= 1.0) {
×
295
        return 1.0;
296
    }
297

298
    y = pow(value, 1.0 / 2.2);
×
299
    return sixel_clamp_unit(y);
×
300
}
301

302
static inline double
303
sixel_oklab_clamp_ab(double value)
×
304
{
305
    double lower;
×
306
    double upper;
×
307

308
    lower = -SIXEL_OKLAB_AB_OFFSET;
×
309
    upper = SIXEL_OKLAB_AB_OFFSET;
×
310
    if (value < lower) {
×
311
        return lower;
312
    }
313
    if (value > upper) {
×
314
        return upper;
315
    }
316

317
    return value;
318
}
319

320
static inline double
321
sixel_cielab_clamp_ab(double value)
×
322
{
323
    if (value < -SIXEL_CIELAB_AB_LIMIT) {
×
324
        return -SIXEL_CIELAB_AB_LIMIT;
325
    }
326
    if (value > SIXEL_CIELAB_AB_LIMIT) {
×
327
        return SIXEL_CIELAB_AB_LIMIT;
328
    }
329

330
    return value;
331
}
332

333
static inline double
334
sixel_din99d_clamp_ab_norm(double value)
×
335
{
336
    if (value < -1.0) {
×
337
        return -1.0;
338
    }
339
    if (value > 1.0) {
×
340
        return 1.0;
341
    }
342

343
    return value;
344
}
345

346
#if 0
347
static inline double
348
sixel_din99d_clamp_ab(double value)
349
{
350
    if (value < -SIXEL_DIN99D_AB_RANGE) {
351
        return -SIXEL_DIN99D_AB_RANGE;
352
    }
353
    if (value > SIXEL_DIN99D_AB_RANGE) {
354
        return SIXEL_DIN99D_AB_RANGE;
355
    }
356

357
    return value;
358
}
359
#endif
360

361
#if defined(SIXEL_USE_NEON)
362
/*
363
 * SIMD lookup helpers accelerate the gamma/linear LUT path on NEON.
364
 * A four-way 64-entry table maps the 256 entry LUT without branches.
365
 */
366
static void
367
sixel_colorspace_fill_neon_table(uint8x16x4_t *table,
368
                                 const unsigned char *source)
369
{
370
    int index;
371

372
    for (index = 0; index < 4; ++index) {
×
373
        const unsigned char *chunk = source + (index * 16);
374

375
        table->val[index] = vld1q_u8(chunk);
376
    }
377
}
378

379
static void
380
sixel_colorspace_prepare_neon_tables(void)
381
{
382
    int block;
383

384
    if (sixel_neon_tables_initialized) {
×
385
        return;
386
    }
387

388
    for (block = 0; block < 4; ++block) {
×
389
        const unsigned char *gamma_src;
390
        const unsigned char *linear_src;
391

392
        gamma_src = gamma_to_linear_lut + (block * 64);
393
        linear_src = linear_to_gamma_lut + (block * 64);
394

395
        sixel_colorspace_fill_neon_table(
396
            &sixel_neon_gamma_to_linear[block],
397
            gamma_src);
398
        sixel_colorspace_fill_neon_table(
399
            &sixel_neon_linear_to_gamma[block],
400
            linear_src);
401
    }
402

403
    sixel_neon_tables_initialized = 1;
404
}
×
405

406
static inline uint8x16_t
407
sixel_colorspace_lookup_neon(uint8x16x4_t *table, uint8x16_t index)
408
{
409
    uint8x16_t block;
410
    uint8x16_t block_mask;
411
    uint8x16_t local_index;
412
    uint8x16_t result;
413
    uint8x16_t selection;
414

415
    block = vshrq_n_u8(index, 6);
416
    local_index = vandq_u8(index, vdupq_n_u8(0x3f));
417

418
    result = vdupq_n_u8(0);
419
    selection = vqtbl4q_u8(table[0], local_index);
420
    block_mask = vceqq_u8(block, vdupq_n_u8(0));
421
    result = vbslq_u8(block_mask, selection, result);
422

423
    selection = vqtbl4q_u8(table[1], local_index);
424
    block_mask = vceqq_u8(block, vdupq_n_u8(1));
425
    result = vbslq_u8(block_mask, selection, result);
426

427
    selection = vqtbl4q_u8(table[2], local_index);
428
    block_mask = vceqq_u8(block, vdupq_n_u8(2));
429
    result = vbslq_u8(block_mask, selection, result);
430

431
    selection = vqtbl4q_u8(table[3], local_index);
432
    block_mask = vceqq_u8(block, vdupq_n_u8(3));
433
    result = vbslq_u8(block_mask, selection, result);
434

435
    return result;
436
}
437

438
static inline uint8x16_t
439
sixel_colorspace_alpha_mask_neon(int pixelformat)
440
{
441
    static const uint8_t mask_rgba[16] = {
442
        0, 0, 0, 255, 0, 0, 0, 255,
443
        0, 0, 0, 255, 0, 0, 0, 255
444
    };
445
    static const uint8_t mask_bgra[16] = {
446
        0, 0, 0, 255, 0, 0, 0, 255,
447
        0, 0, 0, 255, 0, 0, 0, 255
448
    };
449
    static const uint8_t mask_argb[16] = {
450
        255, 0, 0, 0, 255, 0, 0, 0,
451
        255, 0, 0, 0, 255, 0, 0, 0
452
    };
453
    static const uint8_t mask_abgr[16] = {
454
        255, 0, 0, 0, 255, 0, 0, 0,
455
        255, 0, 0, 0, 255, 0, 0, 0
456
    };
457
    static const uint8_t mask_ga[16] = {
458
        0, 255, 0, 255, 0, 255, 0, 255,
459
        0, 255, 0, 255, 0, 255, 0, 255
460
    };
461
    static const uint8_t mask_ag[16] = {
462
        255, 0, 255, 0, 255, 0, 255, 0,
463
        255, 0, 255, 0, 255, 0, 255, 0
464
    };
465

466
    switch (pixelformat) {
×
467
    case SIXEL_PIXELFORMAT_RGBA8888:
468
        return vld1q_u8(mask_rgba);
469
    case SIXEL_PIXELFORMAT_BGRA8888:
470
        return vld1q_u8(mask_bgra);
471
    case SIXEL_PIXELFORMAT_ARGB8888:
472
        return vld1q_u8(mask_argb);
473
    case SIXEL_PIXELFORMAT_ABGR8888:
474
        return vld1q_u8(mask_abgr);
475
    case SIXEL_PIXELFORMAT_GA88:
476
        return vld1q_u8(mask_ga);
477
    case SIXEL_PIXELFORMAT_AG88:
478
        return vld1q_u8(mask_ag);
479
    default:
480
        return vdupq_n_u8(0);
481
    }
482
}
483

484
static void
485
sixel_colorspace_apply_neon(unsigned char *pixels,
486
                            size_t size,
487
                            int pixelformat,
488
                            const unsigned char *lut)
489
{
490
    uint8x16x4_t *table;
491
    uint8x16_t mask;
492
    size_t offset;
493
    size_t remaining;
494
    uint8_t mask_buffer[16];
495

496
    sixel_colorspace_prepare_neon_tables();
497

498
    if (lut == gamma_to_linear_lut) {
×
499
        table = sixel_neon_gamma_to_linear;
500
    } else {
501
        table = sixel_neon_linear_to_gamma;
502
    }
503

504
    mask = sixel_colorspace_alpha_mask_neon(pixelformat);
505
    vst1q_u8(mask_buffer, mask);
506

507
    offset = 0;
508
    remaining = size;
509
    while (remaining >= 16U) {
×
510
        uint8x16_t input = vld1q_u8(pixels + offset);
511
        uint8x16_t converted;
512
        uint8x16_t preserved;
513

514
        converted = sixel_colorspace_lookup_neon(table, input);
515
        preserved = vbslq_u8(mask, input, converted);
516
        vst1q_u8(pixels + offset, preserved);
517

518
        offset += 16U;
519
        remaining -= 16U;
520
    }
521

522
    while (remaining > 0U) {
×
523
        unsigned char original;
524
        unsigned char mapped;
525
        size_t mask_index;
526

527
        mask_index = offset % 16U;
528
        original = pixels[offset];
529
        mapped = lut[original];
530
        if (mask_buffer[mask_index] == 0U) {
×
531
            pixels[offset] = mapped;
532
        }
533

534
        ++offset;
535
        --remaining;
536
    }
537
}
538

539
#endif
540

541
/*
542
 * SIMD kernels share this LUT selector.  MinGW builds without SIMD support
543
 * do not reference it, so we hide the definition unless a SIMD path is
544
 * compiled in to avoid -Werror=unused-function.
545
 */
546
#if defined(SIXEL_USE_NEON) || defined(SIXEL_USE_SSE2) || \
547
        (defined(SIXEL_USE_AVX2) && defined(__AVX2__)) || \
548
        (defined(SIXEL_USE_AVX512) && defined(__AVX512F__) && \
549
         defined(__AVX512BW__))
550
static const unsigned char *
551
sixel_colorspace_select_lut(int colorspace_src, int colorspace_dst)
66✔
552
{
553
    if (colorspace_src == SIXEL_COLORSPACE_GAMMA &&
66!
554
            colorspace_dst == SIXEL_COLORSPACE_LINEAR) {
66✔
555
        return gamma_to_linear_lut;
556
    }
557

558
    if (colorspace_src == SIXEL_COLORSPACE_LINEAR &&
66!
559
            colorspace_dst == SIXEL_COLORSPACE_GAMMA) {
66!
560
        return linear_to_gamma_lut;
561
    }
562

563
    return NULL;
564
}
565
#endif
566

567
#if (defined(SIXEL_USE_AVX2) && defined(__AVX2__)) || \
568
        (defined(SIXEL_USE_AVX512) && defined(__AVX512F__) && \
569
         defined(__AVX512BW__))
570
static inline const uint32_t *
571
sixel_colorspace_select_lut32(int colorspace_src, int colorspace_dst)
572
{
573
    if (colorspace_src == SIXEL_COLORSPACE_GAMMA &&
574
            colorspace_dst == SIXEL_COLORSPACE_LINEAR) {
575
        return sixel_avx_gamma_to_linear_lut32;
576
    }
577

578
    if (colorspace_src == SIXEL_COLORSPACE_LINEAR &&
579
            colorspace_dst == SIXEL_COLORSPACE_GAMMA) {
580
        return sixel_avx_linear_to_gamma_lut32;
581
    }
582

583
    return NULL;
584
}
585

586
#endif
587

588
#if defined(SIXEL_USE_SSE2)
589
static inline __m128i
590
sixel_colorspace_alpha_mask_sse2(int pixelformat)
591
{
592
    static const uint8_t mask_rgba[16] = {
593
        0, 0, 0, 255, 0, 0, 0, 255,
594
        0, 0, 0, 255, 0, 0, 0, 255
595
    };
596
    static const uint8_t mask_bgra[16] = {
597
        0, 0, 0, 255, 0, 0, 0, 255,
598
        0, 0, 0, 255, 0, 0, 0, 255
599
    };
600
    static const uint8_t mask_argb[16] = {
601
        255, 0, 0, 0, 255, 0, 0, 0,
602
        255, 0, 0, 0, 255, 0, 0, 0
603
    };
604
    static const uint8_t mask_abgr[16] = {
605
        255, 0, 0, 0, 255, 0, 0, 0,
606
        255, 0, 0, 0, 255, 0, 0, 0
607
    };
608
    static const uint8_t mask_ga[16] = {
609
        0, 255, 0, 255, 0, 255, 0, 255,
610
        0, 255, 0, 255, 0, 255, 0, 255
611
    };
612
    static const uint8_t mask_ag[16] = {
613
        255, 0, 255, 0, 255, 0, 255, 0,
614
        255, 0, 255, 0, 255, 0, 255, 0
615
    };
616

617
    switch (pixelformat) {
×
618
    case SIXEL_PIXELFORMAT_RGBA8888:
619
        return _mm_loadu_si128((const __m128i *)mask_rgba);
620
    case SIXEL_PIXELFORMAT_BGRA8888:
621
        return _mm_loadu_si128((const __m128i *)mask_bgra);
622
    case SIXEL_PIXELFORMAT_ARGB8888:
623
        return _mm_loadu_si128((const __m128i *)mask_argb);
624
    case SIXEL_PIXELFORMAT_ABGR8888:
625
        return _mm_loadu_si128((const __m128i *)mask_abgr);
626
    case SIXEL_PIXELFORMAT_GA88:
627
        return _mm_loadu_si128((const __m128i *)mask_ga);
628
    case SIXEL_PIXELFORMAT_AG88:
629
        return _mm_loadu_si128((const __m128i *)mask_ag);
630
    default:
631
        return _mm_setzero_si128();
632
    }
633
}
634

635
/*
636
 * SSE2 fallback that still relies on the LUT but performs masking in
637
 * vectors so alpha bytes are kept intact. The lookup itself expands a
638
 * 16-byte chunk to a temporary buffer to avoid SSSE3 pshufb usage.
639
 */
640
static void
641
sixel_colorspace_apply_sse2(unsigned char *pixels,
642
                            size_t size,
643
                            int pixelformat,
644
                            const unsigned char *lut)
645
{
646
    __m128i mask128;
647
    size_t offset;
648
    size_t remaining;
649
    uint8_t mask_buffer[16];
650
    unsigned char input_bytes[16];
651
    unsigned char mapped_bytes[16];
652
    int j;
653

654
    mask128 = sixel_colorspace_alpha_mask_sse2(pixelformat);
655
    _mm_storeu_si128((__m128i *)mask_buffer, mask128);
656

657
    offset = 0U;
658
    remaining = size;
659
    while (remaining >= 16U) {
×
660
        __m128i input;
661
        __m128i mapped;
662
        __m128i preserved;
663

664
        input = _mm_loadu_si128((const __m128i *)(pixels + offset));
665
        _mm_storeu_si128((__m128i *)input_bytes, input);
666

667
        for (j = 0; j < 16; ++j) {
×
668
            mapped_bytes[j] = lut[input_bytes[j]];
669
        }
670

671
        mapped = _mm_loadu_si128((const __m128i *)mapped_bytes);
672
        preserved = _mm_or_si128(_mm_and_si128(mask128, input),
673
                                 _mm_andnot_si128(mask128, mapped));
674

675
        _mm_storeu_si128((__m128i *)(pixels + offset), preserved);
676

677
        offset += 16U;
678
        remaining -= 16U;
679
    }
680

681
    while (remaining > 0U) {
×
682
        unsigned char original;
683
        unsigned char mapped_scalar;
684
        size_t mask_index;
685

686
        mask_index = offset % 16U;
687
        original = pixels[offset];
688
        mapped_scalar = lut[original];
689
        if (mask_buffer[mask_index] == 0U) {
×
690
            pixels[offset] = mapped_scalar;
691
        }
692

693
        ++offset;
694
        --remaining;
695
    }
696
}
697

698
static SIXELSTATUS
699
sixel_colorspace_convert_sse2(unsigned char *pixels,
66✔
700
                              size_t size,
701
                              int pixelformat,
702
                              int colorspace_src,
703
                              int colorspace_dst)
704
{
705
    const unsigned char *lut;
66✔
706

707
    lut = sixel_colorspace_select_lut(colorspace_src, colorspace_dst);
66!
708
    if (lut == NULL) {
709
        return SIXEL_BAD_INPUT;
710
    }
711

712
    switch (pixelformat) {
×
713
    case SIXEL_PIXELFORMAT_RGB888:
714
    case SIXEL_PIXELFORMAT_BGR888:
715
        if (size % 3U != 0U) {
×
716
            return SIXEL_BAD_INPUT;
717
        }
718
        sixel_colorspace_apply_sse2(pixels, size, pixelformat, lut);
719
        return SIXEL_OK;
720
    case SIXEL_PIXELFORMAT_RGBA8888:
721
    case SIXEL_PIXELFORMAT_BGRA8888:
722
    case SIXEL_PIXELFORMAT_ARGB8888:
723
    case SIXEL_PIXELFORMAT_ABGR8888:
724
        if (size % 4U != 0U) {
×
725
            return SIXEL_BAD_INPUT;
726
        }
727
        sixel_colorspace_apply_sse2(pixels, size, pixelformat, lut);
728
        return SIXEL_OK;
729
    case SIXEL_PIXELFORMAT_G8:
730
        sixel_colorspace_apply_sse2(pixels, size, pixelformat, lut);
731
        return SIXEL_OK;
732
    case SIXEL_PIXELFORMAT_GA88:
733
    case SIXEL_PIXELFORMAT_AG88:
734
        if (size % 2U != 0U) {
×
735
            return SIXEL_BAD_INPUT;
736
        }
737
        sixel_colorspace_apply_sse2(pixels, size, pixelformat, lut);
738
        return SIXEL_OK;
739
    default:
740
        break;
741
    }
742

743
    return SIXEL_BAD_INPUT;
744
}
745
#endif
746

747
#if defined(SIXEL_USE_AVX512) && defined(__AVX512F__) && \
748
        defined(__AVX512BW__)
749
/*
750
 * AVX512 path widens the LUT gather to 16 dword lanes, letting us reuse
751
 * the AVX2 tables while masking alpha bytes with SSE2 logic.
752
 */
753
static SIXEL_TARGET_AVX512 void
754
sixel_colorspace_apply_avx512(unsigned char *pixels,
755
                              size_t size,
756
                              int pixelformat,
757
                              const unsigned char *lut,
758
                              const uint32_t *lut32)
759
{
760
    __m128i mask128;
761
    size_t offset;
762
    size_t remaining;
763
    uint8_t mask_buffer[16];
764

765
    mask128 = sixel_colorspace_alpha_mask_sse2(pixelformat);
766
    _mm_storeu_si128((__m128i *)mask_buffer, mask128);
767

768
    offset = 0U;
769
    remaining = size;
770
    while (remaining >= 16U) {
771
        __m128i input;
772
        __m512i indices;
773
        __m512i mapped32;
774
        __m128i converted;
775
        __m128i preserved;
776

777
        input = _mm_loadu_si128((const __m128i *)(pixels + offset));
778
        indices = _mm512_cvtepu8_epi32(input);
779
        mapped32 = _mm512_i32gather_epi32(indices, (const int *)lut32, 4);
780
        converted = _mm512_cvtepi32_epi8(mapped32);
781

782
        preserved = _mm_or_si128(_mm_and_si128(mask128, input),
783
                                 _mm_andnot_si128(mask128, converted));
784

785
        _mm_storeu_si128((__m128i *)(pixels + offset), preserved);
786

787
        offset += 16U;
788
        remaining -= 16U;
789
    }
790

791
    while (remaining > 0U) {
792
        unsigned char original;
793
        unsigned char mapped;
794
        size_t mask_index;
795

796
        mask_index = offset % 16U;
797
        original = pixels[offset];
798
        mapped = lut[original];
799
        if (mask_buffer[mask_index] == 0U) {
800
            pixels[offset] = mapped;
801
        }
802

803
        ++offset;
804
        --remaining;
805
    }
806
}
807

808
static SIXEL_TARGET_AVX512 SIXELSTATUS
809
sixel_colorspace_convert_avx512(unsigned char *pixels,
810
                                size_t size,
811
                                int pixelformat,
812
                                int colorspace_src,
813
                                int colorspace_dst)
814
{
815
    const unsigned char *lut;
816
    const uint32_t *lut32;
817

818
    lut = sixel_colorspace_select_lut(colorspace_src, colorspace_dst);
819
    lut32 = sixel_colorspace_select_lut32(colorspace_src, colorspace_dst);
820
    if (lut == NULL || lut32 == NULL) {
821
        return SIXEL_BAD_INPUT;
822
    }
823

824
    switch (pixelformat) {
825
    case SIXEL_PIXELFORMAT_RGB888:
826
    case SIXEL_PIXELFORMAT_BGR888:
827
        if (size % 3U != 0U) {
828
            return SIXEL_BAD_INPUT;
829
        }
830
        sixel_colorspace_apply_avx512(pixels, size, pixelformat, lut, lut32);
831
        return SIXEL_OK;
832
    case SIXEL_PIXELFORMAT_RGBA8888:
833
    case SIXEL_PIXELFORMAT_BGRA8888:
834
    case SIXEL_PIXELFORMAT_ARGB8888:
835
    case SIXEL_PIXELFORMAT_ABGR8888:
836
        if (size % 4U != 0U) {
837
            return SIXEL_BAD_INPUT;
838
        }
839
        sixel_colorspace_apply_avx512(pixels, size, pixelformat, lut, lut32);
840
        return SIXEL_OK;
841
    case SIXEL_PIXELFORMAT_G8:
842
        sixel_colorspace_apply_avx512(pixels, size, pixelformat, lut, lut32);
843
        return SIXEL_OK;
844
    case SIXEL_PIXELFORMAT_GA88:
845
    case SIXEL_PIXELFORMAT_AG88:
846
        if (size % 2U != 0U) {
847
            return SIXEL_BAD_INPUT;
848
        }
849
        sixel_colorspace_apply_avx512(pixels, size, pixelformat, lut, lut32);
850
        return SIXEL_OK;
851
    default:
852
        break;
853
    }
854

855
    return SIXEL_BAD_INPUT;
856
}
857
#endif
858

859
#if defined(SIXEL_USE_AVX2) && defined(__AVX2__)
860

861
static SIXEL_TARGET_AVX2 void
862
sixel_colorspace_apply_avx2(unsigned char *pixels,
863
                            size_t size,
864
                            int pixelformat,
865
                            const unsigned char *lut,
866
                            const uint32_t *lut32)
867
{
868
    __m128i mask128;
869
    size_t offset;
870
    size_t remaining;
871
    uint8_t mask_buffer[16];
872

873
    mask128 = sixel_colorspace_alpha_mask_sse2(pixelformat);
874
    _mm_storeu_si128((__m128i *)mask_buffer, mask128);
875

876
    offset = 0U;
877
    remaining = size;
878
    while (remaining >= 16U) {
879
        __m128i input;
880
        __m128i input_hi;
881
        __m256i index0;
882
        __m256i index1;
883
        __m256i mapped0;
884
        __m256i mapped1;
885
        __m256i packed16;
886
        __m256i packed8;
887
        __m128i converted;
888
        __m128i preserved;
889

890
        input = _mm_loadu_si128((const __m128i *)(pixels + offset));
891
        input_hi = _mm_srli_si128(input, 8);
892

893
        index0 = _mm256_cvtepu8_epi32(input);
894
        index1 = _mm256_cvtepu8_epi32(input_hi);
895

896
        mapped0 = _mm256_i32gather_epi32((const int *)lut32,
897
                                          index0, 4);
898
        mapped1 = _mm256_i32gather_epi32((const int *)lut32,
899
                                          index1, 4);
900

901
        packed16 = _mm256_packs_epi32(mapped0, mapped1);
902
        packed8 = _mm256_packus_epi16(packed16, packed16);
903

904
        converted = _mm256_castsi256_si128(packed8);
905

906
        preserved = _mm_or_si128(_mm_and_si128(mask128, input),
907
                                 _mm_andnot_si128(mask128, converted));
908

909
        _mm_storeu_si128((__m128i *)(pixels + offset), preserved);
910

911
        offset += 16U;
912
        remaining -= 16U;
913
    }
914

915
    while (remaining > 0U) {
916
        unsigned char original;
917
        unsigned char mapped;
918
        size_t mask_index;
919

920
        mask_index = offset % 16U;
921
        original = pixels[offset];
922
        mapped = lut[original];
923
        if (mask_buffer[mask_index] == 0U) {
924
            pixels[offset] = mapped;
925
        }
926

927
        ++offset;
928
        --remaining;
929
    }
930
}
931

932
static SIXEL_TARGET_AVX2 SIXELSTATUS
933
sixel_colorspace_convert_avx2(unsigned char *pixels,
934
                              size_t size,
935
                              int pixelformat,
936
                              int colorspace_src,
937
                              int colorspace_dst)
938
{
939
    const unsigned char *lut;
940
    const uint32_t *lut32;
941

942
    lut = sixel_colorspace_select_lut(colorspace_src, colorspace_dst);
943
    lut32 = sixel_colorspace_select_lut32(colorspace_src, colorspace_dst);
944
    if (lut == NULL || lut32 == NULL) {
945
        return SIXEL_BAD_INPUT;
946
    }
947

948
    switch (pixelformat) {
949
    case SIXEL_PIXELFORMAT_RGB888:
950
    case SIXEL_PIXELFORMAT_BGR888:
951
        if (size % 3U != 0U) {
952
            return SIXEL_BAD_INPUT;
953
        }
954
        sixel_colorspace_apply_avx2(pixels, size, pixelformat, lut, lut32);
955
        return SIXEL_OK;
956
    case SIXEL_PIXELFORMAT_RGBA8888:
957
    case SIXEL_PIXELFORMAT_BGRA8888:
958
    case SIXEL_PIXELFORMAT_ARGB8888:
959
    case SIXEL_PIXELFORMAT_ABGR8888:
960
        if (size % 4U != 0U) {
961
            return SIXEL_BAD_INPUT;
962
        }
963
        sixel_colorspace_apply_avx2(pixels, size, pixelformat, lut, lut32);
964
        return SIXEL_OK;
965
    case SIXEL_PIXELFORMAT_G8:
966
        sixel_colorspace_apply_avx2(pixels, size, pixelformat, lut, lut32);
967
        return SIXEL_OK;
968
    case SIXEL_PIXELFORMAT_GA88:
969
    case SIXEL_PIXELFORMAT_AG88:
970
        if (size % 2U != 0U) {
971
            return SIXEL_BAD_INPUT;
972
        }
973
        sixel_colorspace_apply_avx2(pixels, size, pixelformat, lut, lut32);
974
        return SIXEL_OK;
975
    default:
976
        break;
977
    }
978

979
    return SIXEL_BAD_INPUT;
980
}
981
#endif
982

983
#if defined(SIXEL_USE_NEON)
984

985
static int
986
sixel_colorspace_neon_supported_format(int pixelformat)
987
{
988
    switch (pixelformat) {
×
989
    case SIXEL_PIXELFORMAT_RGB888:
990
    case SIXEL_PIXELFORMAT_BGR888:
991
    case SIXEL_PIXELFORMAT_RGBA8888:
992
    case SIXEL_PIXELFORMAT_BGRA8888:
993
    case SIXEL_PIXELFORMAT_ARGB8888:
994
    case SIXEL_PIXELFORMAT_ABGR8888:
995
    case SIXEL_PIXELFORMAT_G8:
996
    case SIXEL_PIXELFORMAT_GA88:
997
    case SIXEL_PIXELFORMAT_AG88:
998
        return 1;
999
    default:
1000
        return 0;
1001
    }
1002
}
1003

1004
static SIXELSTATUS
1005
sixel_colorspace_convert_neon(unsigned char *pixels,
1006
                              size_t size,
1007
                              int pixelformat,
1008
                              int colorspace_src,
1009
                              int colorspace_dst)
1010
{
1011
    const unsigned char *lut;
1012

1013
    lut = sixel_colorspace_select_lut(colorspace_src, colorspace_dst);
1014
    if (lut == NULL) {
×
1015
        return SIXEL_BAD_INPUT;
1016
    }
1017

1018
    switch (pixelformat) {
×
1019
    case SIXEL_PIXELFORMAT_RGB888:
1020
    case SIXEL_PIXELFORMAT_BGR888:
1021
        if (size % 3U != 0U) {
×
1022
            return SIXEL_BAD_INPUT;
1023
        }
1024
        sixel_colorspace_apply_neon(pixels, size, pixelformat, lut);
1025
        return SIXEL_OK;
1026
    case SIXEL_PIXELFORMAT_RGBA8888:
1027
    case SIXEL_PIXELFORMAT_BGRA8888:
1028
    case SIXEL_PIXELFORMAT_ARGB8888:
1029
    case SIXEL_PIXELFORMAT_ABGR8888:
1030
        if (size % 4U != 0U) {
×
1031
            return SIXEL_BAD_INPUT;
1032
        }
1033
        sixel_colorspace_apply_neon(pixels, size, pixelformat, lut);
1034
        return SIXEL_OK;
1035
    case SIXEL_PIXELFORMAT_G8:
1036
        sixel_colorspace_apply_neon(pixels, size, pixelformat, lut);
1037
        return SIXEL_OK;
1038
    case SIXEL_PIXELFORMAT_GA88:
1039
    case SIXEL_PIXELFORMAT_AG88:
1040
        if (size % 2U != 0U) {
×
1041
            return SIXEL_BAD_INPUT;
1042
        }
1043
        sixel_colorspace_apply_neon(pixels, size, pixelformat, lut);
1044
        return SIXEL_OK;
1045
    default:
1046
        break;
1047
    }
1048

1049
    return SIXEL_BAD_INPUT;
1050
}
1051
#endif
1052

1053
static unsigned char
1054
sixel_colorspace_clamp(int value)
50,688✔
1055
{
1056
    if (value < 0) {
50,688!
1057
        return 0;
1058
    }
1059
    if (value > 255) {
50,688!
1060
        return 255;
1061
    }
1062
    return (unsigned char)value;
50,688✔
1063
}
1064

1065
static inline double
1066
sixel_srgb_to_linear_double(unsigned char v)
×
1067
{
1068
    double x = (double)v / 255.0;
×
1069

1070
    return sixel_srgb_unit_to_linear(x);
×
1071
}
1072

1073
static inline unsigned char
1074
sixel_linear_double_to_srgb(double v)
×
1075
{
1076
    double y;
×
1077

1078
    y = sixel_linear_to_srgb_unit(v);
×
1079
    return sixel_colorspace_clamp((int)(y * 255.0 + 0.5));
×
1080
}
1081

1082
static inline unsigned char
1083
sixel_linear_double_to_byte(double v)
×
1084
{
1085
    if (v <= 0.0) {
×
1086
        return 0;
1087
    }
1088
    if (v >= 1.0) {
×
1089
        return 255;
1090
    }
1091

1092
    return sixel_colorspace_clamp((int)(v * 255.0 + 0.5));
×
1093
}
1094

1095
static inline double
1096
sixel_smptec_to_linear_double(unsigned char v)
×
1097
{
1098
    double x = (double)v / 255.0;
×
1099

1100
    return sixel_smptec_unit_to_linear(x);
×
1101
}
1102

1103
static inline unsigned char
1104
sixel_linear_double_to_smptec(double v)
×
1105
{
1106
    double y;
×
1107

1108
    y = sixel_linear_to_smptec_unit(v);
×
1109
    return sixel_colorspace_clamp((int)(y * 255.0 + 0.5));
×
1110
}
1111

1112
static inline double
1113
sixel_cielab_f(double t)
×
1114
{
1115
    double delta;
×
1116

1117
    delta = 6.0 / 29.0;
×
1118
    if (t > delta * delta * delta) {
×
1119
        return cbrt(t);
×
1120
    }
1121

1122
    return (t / (3.0 * delta * delta)) + (4.0 / 29.0);
×
1123
}
1124

1125
static inline double
1126
sixel_cielab_f_inv(double t)
×
1127
{
1128
    double delta;
×
1129

1130
    delta = 6.0 / 29.0;
×
1131
    if (t > delta) {
×
1132
        return t * t * t;
×
1133
    }
1134

1135
    return 3.0 * delta * delta * (t - (4.0 / 29.0));
×
1136
}
1137

1138
static inline unsigned char
1139
sixel_oklab_encode_L(double L)
×
1140
{
1141
    if (L < 0.0) {
×
1142
        L = 0.0;
1143
    } else if (L > 1.0) {
×
1144
        L = 1.0;
1145
    }
1146

1147
    return sixel_colorspace_clamp((int)(L * 255.0 + 0.5));
×
1148
}
1149

1150
static inline unsigned char
1151
sixel_oklab_encode_ab(double value)
×
1152
{
1153
    double encoded = value + SIXEL_OKLAB_AB_OFFSET;
×
1154

1155
    if (encoded < 0.0) {
×
1156
        encoded = 0.0;
1157
    } else if (encoded > 1.0) {
×
1158
        encoded = 1.0;
1159
    }
1160

1161
    return sixel_colorspace_clamp((int)(encoded * SIXEL_OKLAB_AB_SCALE + 0.5));
×
1162
}
1163

1164
static inline double
1165
sixel_oklab_decode_ab(unsigned char v)
×
1166
{
1167
    return (double)v / SIXEL_OKLAB_AB_SCALE - SIXEL_OKLAB_AB_OFFSET;
×
1168
}
1169

1170
static inline unsigned char
1171
sixel_cielab_encode_L(double L)
×
1172
{
1173
    double clamped;
×
1174

1175
    clamped = sixel_clamp_unit(L);
×
1176
    return sixel_colorspace_clamp((int)(clamped * 255.0 + 0.5));
×
1177
}
1178

1179
static inline unsigned char
1180
sixel_cielab_encode_ab(double value)
×
1181
{
1182
    double shifted;
×
1183
    double normalized;
×
1184

1185
    shifted = sixel_cielab_clamp_ab(value);
×
1186
    normalized = (shifted / (2.0 * SIXEL_CIELAB_AB_LIMIT)) + 0.5;
×
1187
    return sixel_colorspace_clamp((int)(normalized * 255.0 + 0.5));
×
1188
}
1189

1190
static inline double
1191
sixel_cielab_decode_ab(unsigned char v)
×
1192
{
1193
    double encoded;
×
1194

1195
    encoded = (double)v / 255.0;
×
1196
    return (encoded - 0.5) * (2.0 * SIXEL_CIELAB_AB_LIMIT);
×
1197
}
1198

1199
static inline unsigned char
1200
sixel_din99d_encode_L(double L)
×
1201
{
1202
    double clamped;
×
1203

1204
    clamped = sixel_clamp_unit(L);
×
1205
    return sixel_colorspace_clamp((int)(clamped * 255.0 + 0.5));
×
1206
}
1207

1208
static inline unsigned char
1209
sixel_din99d_encode_ab(double value)
×
1210
{
1211
    double normalized;
×
1212

1213
    normalized = (sixel_din99d_clamp_ab_norm(value) / 2.0) + 0.5;
×
1214
    return sixel_colorspace_clamp((int)(normalized * 255.0 + 0.5));
×
1215
}
1216

1217
static inline double
1218
sixel_din99d_decode_ab(unsigned char v)
×
1219
{
1220
    double encoded;
×
1221

1222
    encoded = (double)v / 255.0;
×
1223
    return sixel_din99d_clamp_ab_norm((encoded - 0.5) * 2.0);
×
1224
}
1225

1226
static void
1227
sixel_linear_to_cielab(double r, double g, double b,
×
1228
                       double *L, double *A, double *B)
1229
{
1230
    const double Xn = 0.95047;
×
1231
    const double Yn = 1.00000;
×
1232
    const double Zn = 1.08883;
×
1233
    double X;
×
1234
    double Y;
×
1235
    double Z;
×
1236
    double fx;
×
1237
    double fy;
×
1238
    double fz;
×
1239
    double L_component;
×
1240
    double a_component;
×
1241
    double b_component;
×
1242

1243
    X = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b;
×
1244
    Y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b;
×
1245
    Z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b;
×
1246

1247
    fx = sixel_cielab_f(X / Xn);
×
1248
    fy = sixel_cielab_f(Y / Yn);
×
1249
    fz = sixel_cielab_f(Z / Zn);
×
1250

1251
    L_component = 116.0 * fy - 16.0;
×
1252
    a_component = 500.0 * (fx - fy);
×
1253
    b_component = 200.0 * (fy - fz);
×
1254

1255
    *L = sixel_clamp_unit(L_component / SIXEL_CIELAB_L_SCALE);
×
1256
    *A = sixel_cielab_clamp_ab(a_component / SIXEL_CIELAB_AB_SCALE);
×
1257
    *B = sixel_cielab_clamp_ab(b_component / SIXEL_CIELAB_AB_SCALE);
×
1258
}
×
1259

1260
static void
1261
sixel_cielab_to_linear(double L, double A, double B,
×
1262
                       double *r, double *g, double *b)
1263
{
1264
    const double Xn = 0.95047;
×
1265
    const double Yn = 1.00000;
×
1266
    const double Zn = 1.08883;
×
1267
    double L_component;
×
1268
    double a_component;
×
1269
    double b_component;
×
1270
    double fx;
×
1271
    double fy;
×
1272
    double fz;
×
1273
    double X;
×
1274
    double Y;
×
1275
    double Z;
×
1276

1277
    L_component = sixel_clamp_unit(L) * SIXEL_CIELAB_L_SCALE;
×
1278
    a_component = sixel_cielab_clamp_ab(A) * SIXEL_CIELAB_AB_SCALE;
×
1279
    b_component = sixel_cielab_clamp_ab(B) * SIXEL_CIELAB_AB_SCALE;
×
1280

1281
    fy = (L_component + 16.0) / 116.0;
×
1282
    fx = fy + (a_component / 500.0);
×
1283
    fz = fy - (b_component / 200.0);
×
1284

1285
    X = Xn * sixel_cielab_f_inv(fx);
×
1286
    Y = Yn * sixel_cielab_f_inv(fy);
×
1287
    Z = Zn * sixel_cielab_f_inv(fz);
×
1288

1289
    *r = 3.2404542 * X - 1.5371385 * Y - 0.4985314 * Z;
×
1290
    *g = -0.9692660 * X + 1.8760108 * Y + 0.0415560 * Z;
×
1291
    *b = 0.0556434 * X - 0.2040259 * Y + 1.0572252 * Z;
×
1292

1293
    *r = sixel_clamp_unit(*r);
×
1294
    *g = sixel_clamp_unit(*g);
×
1295
    *b = sixel_clamp_unit(*b);
×
1296
}
×
1297

1298
static void
1299
sixel_cielab_to_din99d(double L,
×
1300
                       double a,
1301
                       double b,
1302
                       double *L99d,
1303
                       double *A99d,
1304
                       double *B99d)
1305
{
1306
    /* Convert from CIELAB to DIN99d using Cui et al. (2002) parameters. */
1307
    const double c1 = 325.22;
×
1308
    const double c2 = 0.0036;
×
1309
    const double c3 = 50.0;
×
1310
    const double c4 = 1.14;
×
1311
    const double c5 = 22.5;
×
1312
    const double c6 = 0.06;
×
1313
    const double c7 = 50.0;
×
1314
    const double c8 = 1.0;
×
1315
    const double rad_per_degree = 3.14159265358979323846 / 180.0;
×
1316
    double radians_c3;
×
1317
    double radians_c7;
×
1318
    double e;
×
1319
    double f;
×
1320
    double G;
×
1321
    double h_ef;
×
1322
    double C99;
×
1323

1324
    radians_c3 = c3 * rad_per_degree;
×
1325
    radians_c7 = c7 * rad_per_degree;
×
1326

1327
    e = cos(radians_c3) * a + sin(radians_c3) * b;
×
1328
    f = c4 * (-sin(radians_c3) * a + cos(radians_c3) * b);
×
1329
    G = sqrt(e * e + f * f);
×
1330
    h_ef = atan2(f, e) + radians_c7;
×
1331

1332
    C99 = c5 * (log1p(c6 * G)) / (c8);
×
1333

1334
    *A99d = C99 * cos(h_ef);
×
1335
    *B99d = C99 * sin(h_ef);
×
1336
    *L99d = c1 * log1p(c2 * L);
×
1337
}
×
1338

1339
static void
1340
sixel_din99d_to_cielab(double L99d,
×
1341
                       double A99d,
1342
                       double B99d,
1343
                       double *L,
1344
                       double *a,
1345
                       double *b)
1346
{
1347
    /* Convert from DIN99d back to absolute CIELAB coordinates. */
1348
    const double c1 = 325.22;
×
1349
    const double c2 = 0.0036;
×
1350
    const double c3 = 50.0;
×
1351
    const double c4 = 1.14;
×
1352
    const double c5 = 22.5;
×
1353
    const double c6 = 0.06;
×
1354
    const double c7 = 50.0;
×
1355
    const double c8 = 1.0;
×
1356
    const double rad_per_degree = 3.14159265358979323846 / 180.0;
×
1357
    double radians_c3;
×
1358
    double radians_c7;
×
1359
    double h99;
×
1360
    double C99;
×
1361
    double G;
×
1362
    double e;
×
1363
    double f;
×
1364

1365
    radians_c3 = c3 * rad_per_degree;
×
1366
    radians_c7 = c7 * rad_per_degree;
×
1367

1368
    h99 = atan2(B99d, A99d) - radians_c7;
×
1369
    C99 = hypot(A99d, B99d);
×
1370
    G = expm1((c8 / c5) * C99) / c6;
×
1371

1372
    e = G * cos(h99);
×
1373
    f = G * sin(h99);
×
1374

1375
    *a = e * cos(radians_c3) - (f / c4) * sin(radians_c3);
×
1376
    *b = e * sin(radians_c3) + (f / c4) * cos(radians_c3);
×
1377
    *L = expm1(L99d / c1) / c2;
×
1378
}
×
1379

1380
static void
1381
sixel_linear_to_din99d(double r,
×
1382
                       double g,
1383
                       double b,
1384
                       double *L99d_norm,
1385
                       double *A99d_norm,
1386
                       double *B99d_norm)
1387
{
1388
    double L;
×
1389
    double A;
×
1390
    double B;
×
1391
    double L_star;
×
1392
    double a_star;
×
1393
    double b_star;
×
1394
    double L99d;
×
1395
    double A99d;
×
1396
    double B99d;
×
1397

1398
    sixel_linear_to_cielab(r, g, b, &L, &A, &B);
×
1399

1400
    L_star = L * SIXEL_CIELAB_L_SCALE;
×
1401
    a_star = A * SIXEL_CIELAB_AB_SCALE;
×
1402
    b_star = B * SIXEL_CIELAB_AB_SCALE;
×
1403

1404
    sixel_cielab_to_din99d(L_star, a_star, b_star, &L99d, &A99d, &B99d);
×
1405

1406
    *L99d_norm = sixel_clamp_unit(L99d / SIXEL_DIN99D_L_SCALE);
×
1407
    *A99d_norm = sixel_din99d_clamp_ab_norm(
×
1408
        A99d / SIXEL_DIN99D_AB_RANGE);
1409
    *B99d_norm = sixel_din99d_clamp_ab_norm(
×
1410
        B99d / SIXEL_DIN99D_AB_RANGE);
1411
}
×
1412

1413
static void
1414
sixel_din99d_to_linear(double L99d_norm,
×
1415
                       double A99d_norm,
1416
                       double B99d_norm,
1417
                       double *r,
1418
                       double *g,
1419
                       double *b)
1420
{
1421
    double L_star;
×
1422
    double a_star;
×
1423
    double b_star;
×
1424
    double L;
×
1425
    double A;
×
1426
    double B;
×
1427

1428
    L = sixel_clamp_unit(L99d_norm) * SIXEL_DIN99D_L_SCALE;
×
1429
    A = sixel_din99d_clamp_ab_norm(A99d_norm) * SIXEL_DIN99D_AB_RANGE;
×
1430
    B = sixel_din99d_clamp_ab_norm(B99d_norm) * SIXEL_DIN99D_AB_RANGE;
×
1431

1432
    sixel_din99d_to_cielab(L, A, B, &L_star, &a_star, &b_star);
×
1433

1434
    L_star = sixel_clamp_unit(L_star / SIXEL_CIELAB_L_SCALE);
×
1435
    a_star = sixel_cielab_clamp_ab(a_star / SIXEL_CIELAB_AB_SCALE);
×
1436
    b_star = sixel_cielab_clamp_ab(b_star / SIXEL_CIELAB_AB_SCALE);
×
1437

1438
    sixel_cielab_to_linear(L_star, a_star, b_star, r, g, b);
×
1439
}
×
1440

1441
static void
1442
sixel_linear_to_oklab(double r, double g, double b,
×
1443
                      double *L, double *A, double *B)
1444
{
1445
    double l;
×
1446
    double m;
×
1447
    double s;
×
1448
    double l_;
×
1449
    double m_;
×
1450
    double s_;
×
1451

1452
    l = 0.4122214708 * r + 0.5363325363 * g + 0.0514459929 * b;
×
1453
    m = 0.2119034982 * r + 0.6806995451 * g + 0.1073969566 * b;
×
1454
    s = 0.0883024619 * r + 0.2817188376 * g + 0.6299787005 * b;
×
1455

1456
    l_ = cbrt(l);
×
1457
    m_ = cbrt(m);
×
1458
    s_ = cbrt(s);
×
1459

1460
    *L = 0.2104542553 * l_ + 0.7936177850 * m_ - 0.0040720468 * s_;
×
1461
    *A = 1.9779984951 * l_ - 2.4285922050 * m_ + 0.4505937099 * s_;
×
1462
    *B = 0.0259040371 * l_ + 0.7827717662 * m_ - 0.8086757660 * s_;
×
1463
}
×
1464

1465
static void
1466
sixel_oklab_to_linear(double L, double A, double B,
×
1467
                      double *r, double *g, double *b)
1468
{
1469
    double l_;
×
1470
    double m_;
×
1471
    double s_;
×
1472
    double l;
×
1473
    double m;
×
1474
    double s;
×
1475

1476
    l_ = L + 0.3963377774 * A + 0.2158037573 * B;
×
1477
    m_ = L - 0.1055613458 * A - 0.0638541728 * B;
×
1478
    s_ = L - 0.0894841775 * A - 1.2914855480 * B;
×
1479

1480
    l = l_ * l_ * l_;
×
1481
    m = m_ * m_ * m_;
×
1482
    s = s_ * s_ * s_;
×
1483

1484
    *r = 4.0767416621 * l - 3.3077115913 * m + 0.2309699292 * s;
×
1485
    *g = -1.2684380046 * l + 2.6097574011 * m - 0.3413193965 * s;
×
1486
    *b = -0.0041960863 * l - 0.7034186147 * m + 1.7076147010 * s;
×
1487

1488
    if (*r < 0.0) {
×
1489
        *r = 0.0;
×
1490
    }
1491
    if (*g < 0.0) {
×
1492
        *g = 0.0;
×
1493
    }
1494
    if (*b < 0.0) {
×
1495
        *b = 0.0;
×
1496
    }
1497
}
×
1498

1499
static void
1500
sixel_linear_srgb_to_smptec(double r, double g, double b,
×
1501
                            double *rs, double *gs, double *bs)
1502
{
1503
    double sr;
×
1504
    double sg;
×
1505
    double sb;
×
1506

1507
    sr = sixel_linear_srgb_to_smptec_matrix[0][0] * r
×
1508
       + sixel_linear_srgb_to_smptec_matrix[0][1] * g
×
1509
       + sixel_linear_srgb_to_smptec_matrix[0][2] * b;
×
1510
    sg = sixel_linear_srgb_to_smptec_matrix[1][0] * r
×
1511
       + sixel_linear_srgb_to_smptec_matrix[1][1] * g
×
1512
       + sixel_linear_srgb_to_smptec_matrix[1][2] * b;
×
1513
    sb = sixel_linear_srgb_to_smptec_matrix[2][0] * r
×
1514
       + sixel_linear_srgb_to_smptec_matrix[2][1] * g
×
1515
       + sixel_linear_srgb_to_smptec_matrix[2][2] * b;
×
1516

1517
    *rs = sixel_clamp_unit(sr);
×
1518
    *gs = sixel_clamp_unit(sg);
×
1519
    *bs = sixel_clamp_unit(sb);
×
1520
}
×
1521

1522
static void
1523
sixel_linear_smptec_to_srgb(double rs, double gs, double bs,
×
1524
                            double *r, double *g, double *b)
1525
{
1526
    double r_lin;
×
1527
    double g_lin;
×
1528
    double b_lin;
×
1529

1530
    r_lin = sixel_linear_smptec_to_srgb_matrix[0][0] * rs
×
1531
          + sixel_linear_smptec_to_srgb_matrix[0][1] * gs
×
1532
          + sixel_linear_smptec_to_srgb_matrix[0][2] * bs;
×
1533
    g_lin = sixel_linear_smptec_to_srgb_matrix[1][0] * rs
×
1534
          + sixel_linear_smptec_to_srgb_matrix[1][1] * gs
×
1535
          + sixel_linear_smptec_to_srgb_matrix[1][2] * bs;
×
1536
    b_lin = sixel_linear_smptec_to_srgb_matrix[2][0] * rs
×
1537
          + sixel_linear_smptec_to_srgb_matrix[2][1] * gs
×
1538
          + sixel_linear_smptec_to_srgb_matrix[2][2] * bs;
×
1539

1540
    *r = sixel_clamp_unit(r_lin);
×
1541
    *g = sixel_clamp_unit(g_lin);
×
1542
    *b = sixel_clamp_unit(b_lin);
×
1543
}
×
1544

1545
static void
1546
sixel_smptec_to_linear_scalar(float *r, float *g, float *b)
×
1547
{
1548
    double rs;
×
1549
    double gs;
×
1550
    double bs;
×
1551
    double r_lin;
×
1552
    double g_lin;
×
1553
    double b_lin;
×
1554

1555
    rs = sixel_clamp_unit((double)*r);
×
1556
    gs = sixel_clamp_unit((double)*g);
×
1557
    bs = sixel_clamp_unit((double)*b);
×
1558

1559
    r_lin = sixel_linear_smptec_to_srgb_matrix[0][0] * rs
×
1560
          + sixel_linear_smptec_to_srgb_matrix[0][1] * gs
×
1561
          + sixel_linear_smptec_to_srgb_matrix[0][2] * bs;
×
1562
    g_lin = sixel_linear_smptec_to_srgb_matrix[1][0] * rs
×
1563
          + sixel_linear_smptec_to_srgb_matrix[1][1] * gs
×
1564
          + sixel_linear_smptec_to_srgb_matrix[1][2] * bs;
×
1565
    b_lin = sixel_linear_smptec_to_srgb_matrix[2][0] * rs
×
1566
          + sixel_linear_smptec_to_srgb_matrix[2][1] * gs
×
1567
          + sixel_linear_smptec_to_srgb_matrix[2][2] * bs;
×
1568

1569
    *r = (float)sixel_clamp_unit(r_lin);
×
1570
    *g = (float)sixel_clamp_unit(g_lin);
×
1571
    *b = (float)sixel_clamp_unit(b_lin);
×
1572
}
×
1573

1574
static void
1575
sixel_linear_to_smptec_scalar(float *r, float *g, float *b)
×
1576
{
1577
    double r_lin;
×
1578
    double g_lin;
×
1579
    double b_lin;
×
1580
    double sr;
×
1581
    double sg;
×
1582
    double sb;
×
1583

1584
    r_lin = sixel_clamp_unit((double)*r);
×
1585
    g_lin = sixel_clamp_unit((double)*g);
×
1586
    b_lin = sixel_clamp_unit((double)*b);
×
1587

1588
    sr = sixel_linear_srgb_to_smptec_matrix[0][0] * r_lin
×
1589
       + sixel_linear_srgb_to_smptec_matrix[0][1] * g_lin
×
1590
       + sixel_linear_srgb_to_smptec_matrix[0][2] * b_lin;
×
1591
    sg = sixel_linear_srgb_to_smptec_matrix[1][0] * r_lin
×
1592
       + sixel_linear_srgb_to_smptec_matrix[1][1] * g_lin
×
1593
       + sixel_linear_srgb_to_smptec_matrix[1][2] * b_lin;
×
1594
    sb = sixel_linear_srgb_to_smptec_matrix[2][0] * r_lin
×
1595
       + sixel_linear_srgb_to_smptec_matrix[2][1] * g_lin
×
1596
       + sixel_linear_srgb_to_smptec_matrix[2][2] * b_lin;
×
1597

1598
    *r = (float)sixel_clamp_unit(sr);
×
1599
    *g = (float)sixel_clamp_unit(sg);
×
1600
    *b = (float)sixel_clamp_unit(sb);
×
1601
}
×
1602

1603
/*
1604
 * SIMD helpers below operate on interleaved RGB float triplets.
1605
 * Channels are gathered with byte offsets, multiplied by the 3x3
1606
 * SMPTEC↔sRGB matrices, and scattered back into the same AoS layout
1607
 * after clamping to [0, 1].
1608
 */
1609
#if defined(SIXEL_USE_AVX512) && defined(__AVX512F__) && \
1610
        defined(__AVX512BW__)
1611
static SIXEL_TARGET_AVX512 size_t
1612
sixel_smptec_to_linear_avx512(float *pixels, size_t pixel_total)
1613
{
1614
    size_t index;
1615
    size_t processed;
1616
    const __m512 zero = _mm512_set1_ps(0.0f);
1617
    const __m512 one = _mm512_set1_ps(1.0f);
1618
    const __m512 m00 = _mm512_set1_ps(
1619
        sixel_linear_smptec_to_srgb_matrix_f32[0][0]);
1620
    const __m512 m01 = _mm512_set1_ps(
1621
        sixel_linear_smptec_to_srgb_matrix_f32[0][1]);
1622
    const __m512 m02 = _mm512_set1_ps(
1623
        sixel_linear_smptec_to_srgb_matrix_f32[0][2]);
1624
    const __m512 m10 = _mm512_set1_ps(
1625
        sixel_linear_smptec_to_srgb_matrix_f32[1][0]);
1626
    const __m512 m11 = _mm512_set1_ps(
1627
        sixel_linear_smptec_to_srgb_matrix_f32[1][1]);
1628
    const __m512 m12 = _mm512_set1_ps(
1629
        sixel_linear_smptec_to_srgb_matrix_f32[1][2]);
1630
    const __m512 m20 = _mm512_set1_ps(
1631
        sixel_linear_smptec_to_srgb_matrix_f32[2][0]);
1632
    const __m512 m21 = _mm512_set1_ps(
1633
        sixel_linear_smptec_to_srgb_matrix_f32[2][1]);
1634
    const __m512 m22 = _mm512_set1_ps(
1635
        sixel_linear_smptec_to_srgb_matrix_f32[2][2]);
1636
    const __m512i idx_r = _mm512_setr_epi32(
1637
        0, 12, 24, 36, 48, 60, 72, 84,
1638
        96, 108, 120, 132, 144, 156, 168, 180);
1639
    const __m512i idx_g = _mm512_setr_epi32(
1640
        4, 16, 28, 40, 52, 64, 76, 88,
1641
        100, 112, 124, 136, 148, 160, 172, 184);
1642
    const __m512i idx_b = _mm512_setr_epi32(
1643
        8, 20, 32, 44, 56, 68, 80, 92,
1644
        104, 116, 128, 140, 152, 164, 176, 188);
1645

1646
    processed = pixel_total - (pixel_total % 16U);
1647
    for (index = 0U; index < processed; index += 16U) {
1648
        const char *base_char;
1649
        const float *base;
1650
        __m512 r;
1651
        __m512 g;
1652
        __m512 b;
1653
        __m512 r_lin;
1654
        __m512 g_lin;
1655
        __m512 b_lin;
1656

1657
        base_char = (const char *)(pixels + index * 3U);
1658
        base = (const float *)base_char;
1659

1660
        r = _mm512_i32gather_ps(idx_r, base, 1);
1661
        g = _mm512_i32gather_ps(idx_g, base, 1);
1662
        b = _mm512_i32gather_ps(idx_b, base, 1);
1663

1664
        r_lin = SIXEL_FMADD_PS512(g, m01, _mm512_mul_ps(r, m00));
1665
        r_lin = SIXEL_FMADD_PS512(b, m02, r_lin);
1666

1667
        g_lin = SIXEL_FMADD_PS512(g, m11, _mm512_mul_ps(r, m10));
1668
        g_lin = SIXEL_FMADD_PS512(b, m12, g_lin);
1669

1670
        b_lin = SIXEL_FMADD_PS512(g, m21, _mm512_mul_ps(r, m20));
1671
        b_lin = SIXEL_FMADD_PS512(b, m22, b_lin);
1672

1673
        r_lin = _mm512_min_ps(one, _mm512_max_ps(zero, r_lin));
1674
        g_lin = _mm512_min_ps(one, _mm512_max_ps(zero, g_lin));
1675
        b_lin = _mm512_min_ps(one, _mm512_max_ps(zero, b_lin));
1676

1677
        _mm512_i32scatter_ps((float *)base, idx_r, r_lin, 1);
1678
        _mm512_i32scatter_ps((float *)base, idx_g, g_lin, 1);
1679
        _mm512_i32scatter_ps((float *)base, idx_b, b_lin, 1);
1680
    }
1681

1682
    return processed;
1683
}
1684

1685
static SIXEL_TARGET_AVX512 size_t
1686
sixel_linear_to_smptec_avx512(float *pixels, size_t pixel_total)
1687
{
1688
    size_t index;
1689
    size_t processed;
1690
    const __m512 zero = _mm512_set1_ps(0.0f);
1691
    const __m512 one = _mm512_set1_ps(1.0f);
1692
    const __m512 m00 = _mm512_set1_ps(
1693
        sixel_linear_srgb_to_smptec_matrix_f32[0][0]);
1694
    const __m512 m01 = _mm512_set1_ps(
1695
        sixel_linear_srgb_to_smptec_matrix_f32[0][1]);
1696
    const __m512 m02 = _mm512_set1_ps(
1697
        sixel_linear_srgb_to_smptec_matrix_f32[0][2]);
1698
    const __m512 m10 = _mm512_set1_ps(
1699
        sixel_linear_srgb_to_smptec_matrix_f32[1][0]);
1700
    const __m512 m11 = _mm512_set1_ps(
1701
        sixel_linear_srgb_to_smptec_matrix_f32[1][1]);
1702
    const __m512 m12 = _mm512_set1_ps(
1703
        sixel_linear_srgb_to_smptec_matrix_f32[1][2]);
1704
    const __m512 m20 = _mm512_set1_ps(
1705
        sixel_linear_srgb_to_smptec_matrix_f32[2][0]);
1706
    const __m512 m21 = _mm512_set1_ps(
1707
        sixel_linear_srgb_to_smptec_matrix_f32[2][1]);
1708
    const __m512 m22 = _mm512_set1_ps(
1709
        sixel_linear_srgb_to_smptec_matrix_f32[2][2]);
1710
    const __m512i idx_r = _mm512_setr_epi32(
1711
        0, 12, 24, 36, 48, 60, 72, 84,
1712
        96, 108, 120, 132, 144, 156, 168, 180);
1713
    const __m512i idx_g = _mm512_setr_epi32(
1714
        4, 16, 28, 40, 52, 64, 76, 88,
1715
        100, 112, 124, 136, 148, 160, 172, 184);
1716
    const __m512i idx_b = _mm512_setr_epi32(
1717
        8, 20, 32, 44, 56, 68, 80, 92,
1718
        104, 116, 128, 140, 152, 164, 176, 188);
1719

1720
    processed = pixel_total - (pixel_total % 16U);
1721
    for (index = 0U; index < processed; index += 16U) {
1722
        const char *base_char;
1723
        const float *base;
1724
        __m512 r;
1725
        __m512 g;
1726
        __m512 b;
1727
        __m512 sr;
1728
        __m512 sg;
1729
        __m512 sb;
1730

1731
        base_char = (const char *)(pixels + index * 3U);
1732
        base = (const float *)base_char;
1733

1734
        r = _mm512_i32gather_ps(idx_r, base, 1);
1735
        g = _mm512_i32gather_ps(idx_g, base, 1);
1736
        b = _mm512_i32gather_ps(idx_b, base, 1);
1737

1738
        sr = SIXEL_FMADD_PS512(g, m01, _mm512_mul_ps(r, m00));
1739
        sr = SIXEL_FMADD_PS512(b, m02, sr);
1740

1741
        sg = SIXEL_FMADD_PS512(g, m11, _mm512_mul_ps(r, m10));
1742
        sg = SIXEL_FMADD_PS512(b, m12, sg);
1743

1744
        sb = SIXEL_FMADD_PS512(g, m21, _mm512_mul_ps(r, m20));
1745
        sb = SIXEL_FMADD_PS512(b, m22, sb);
1746

1747
        sr = _mm512_min_ps(one, _mm512_max_ps(zero, sr));
1748
        sg = _mm512_min_ps(one, _mm512_max_ps(zero, sg));
1749
        sb = _mm512_min_ps(one, _mm512_max_ps(zero, sb));
1750

1751
        _mm512_i32scatter_ps((float *)base, idx_r, sr, 1);
1752
        _mm512_i32scatter_ps((float *)base, idx_g, sg, 1);
1753
        _mm512_i32scatter_ps((float *)base, idx_b, sb, 1);
1754
    }
1755

1756
    return processed;
1757
}
1758
#endif
1759

1760
#if defined(SIXEL_USE_AVX2) && defined(__AVX2__)
1761
static SIXEL_TARGET_AVX2 size_t
1762
sixel_smptec_to_linear_avx2(float *pixels, size_t pixel_total)
1763
{
1764
    size_t index;
1765
    size_t processed;
1766
    const __m256 zero = _mm256_set1_ps(0.0f);
1767
    const __m256 one = _mm256_set1_ps(1.0f);
1768
    const __m256 m00 = _mm256_set1_ps(
1769
        sixel_linear_smptec_to_srgb_matrix_f32[0][0]);
1770
    const __m256 m01 = _mm256_set1_ps(
1771
        sixel_linear_smptec_to_srgb_matrix_f32[0][1]);
1772
    const __m256 m02 = _mm256_set1_ps(
1773
        sixel_linear_smptec_to_srgb_matrix_f32[0][2]);
1774
    const __m256 m10 = _mm256_set1_ps(
1775
        sixel_linear_smptec_to_srgb_matrix_f32[1][0]);
1776
    const __m256 m11 = _mm256_set1_ps(
1777
        sixel_linear_smptec_to_srgb_matrix_f32[1][1]);
1778
    const __m256 m12 = _mm256_set1_ps(
1779
        sixel_linear_smptec_to_srgb_matrix_f32[1][2]);
1780
    const __m256 m20 = _mm256_set1_ps(
1781
        sixel_linear_smptec_to_srgb_matrix_f32[2][0]);
1782
    const __m256 m21 = _mm256_set1_ps(
1783
        sixel_linear_smptec_to_srgb_matrix_f32[2][1]);
1784
    const __m256 m22 = _mm256_set1_ps(
1785
        sixel_linear_smptec_to_srgb_matrix_f32[2][2]);
1786
    const __m256i idx_r = _mm256_setr_epi32(
1787
        0, 12, 24, 36, 48, 60, 72, 84);
1788
    const __m256i idx_g = _mm256_setr_epi32(
1789
        4, 16, 28, 40, 52, 64, 76, 88);
1790
    const __m256i idx_b = _mm256_setr_epi32(
1791
        8, 20, 32, 44, 56, 68, 80, 92);
1792
    const char *base_char;
1793
    const float *base;
1794
    float *store;
1795
    __m256 r;
1796
    __m256 g;
1797
    __m256 b;
1798
    __m256 r_lin;
1799
    __m256 g_lin;
1800
    __m256 b_lin;
1801
    __m128 sr_lo;
1802
    __m128 sg_lo;
1803
    __m128 sb_lo;
1804
    __m128 sr_hi;
1805
    __m128 sg_hi;
1806
    __m128 sb_hi;
1807
    __m128 rg_lo;
1808
    __m128 rg_hi;
1809
    __m128 gb_lo;
1810
    __m128 gb_hi;
1811
    __m128 br_hi;
1812
    __m128 store0;
1813
    __m128 store1;
1814
    __m128 store2;
1815
    __m128 store3;
1816
    __m128 store4;
1817
    __m128 store5;
1818

1819
    processed = pixel_total - (pixel_total % 8U);
1820
    for (index = 0U; index < processed; index += 8U) {
1821
        base_char = (const char *)(pixels + index * 3U);
1822
        base = (const float *)base_char;
1823
        store = (float *)base_char;
1824

1825
        r = _mm256_i32gather_ps(base, idx_r, 1);
1826
        g = _mm256_i32gather_ps(base, idx_g, 1);
1827
        b = _mm256_i32gather_ps(base, idx_b, 1);
1828

1829
        r_lin = SIXEL_FMADD_PS256(g, m01, _mm256_mul_ps(r, m00));
1830
        r_lin = SIXEL_FMADD_PS256(b, m02, r_lin);
1831

1832
        g_lin = SIXEL_FMADD_PS256(g, m11, _mm256_mul_ps(r, m10));
1833
        g_lin = SIXEL_FMADD_PS256(b, m12, g_lin);
1834

1835
        b_lin = SIXEL_FMADD_PS256(g, m21, _mm256_mul_ps(r, m20));
1836
        b_lin = SIXEL_FMADD_PS256(b, m22, b_lin);
1837

1838
        r_lin = _mm256_min_ps(one, _mm256_max_ps(zero, r_lin));
1839
        g_lin = _mm256_min_ps(one, _mm256_max_ps(zero, g_lin));
1840
        b_lin = _mm256_min_ps(one, _mm256_max_ps(zero, b_lin));
1841

1842
        sr_lo = _mm256_castps256_ps128(r_lin);
1843
        sr_hi = _mm256_extractf128_ps(r_lin, 1);
1844
        sg_lo = _mm256_castps256_ps128(g_lin);
1845
        sg_hi = _mm256_extractf128_ps(g_lin, 1);
1846
        sb_lo = _mm256_castps256_ps128(b_lin);
1847
        sb_hi = _mm256_extractf128_ps(b_lin, 1);
1848

1849
        /*
1850
         * Re-interleave SoA vectors back into AoS layout using shuffle
1851
         * and blend only. This avoids the temporary scalar arrays and
1852
         * keeps the writeback path entirely vectorized.
1853
         */
1854
        rg_lo = _mm_unpacklo_ps(sr_lo, sg_lo);
1855
        rg_hi = _mm_unpackhi_ps(sr_lo, sg_lo);
1856
        gb_lo = _mm_unpacklo_ps(sg_lo, sb_lo);
1857
        gb_hi = _mm_unpackhi_ps(sg_lo, sb_lo);
1858
        br_hi = _mm_unpackhi_ps(sb_lo, sr_lo);
1859

1860
        store0 = _mm_movelh_ps(rg_lo, sb_lo);
1861
        store0 = _mm_blend_ps(store0,
1862
                              _mm_shuffle_ps(sr_lo, sr_lo,
1863
                                             _MM_SHUFFLE(1, 1, 1, 1)),
1864
                              0x8);
1865
        store1 = _mm_shuffle_ps(gb_lo, rg_hi, 0x4E);
1866
        store2 = _mm_shuffle_ps(br_hi, gb_hi, 0xEC);
1867

1868
        _mm_storeu_ps(store, store0);
1869
        _mm_storeu_ps(store + 4U, store1);
1870
        _mm_storeu_ps(store + 8U, store2);
1871

1872
        rg_lo = _mm_unpacklo_ps(sr_hi, sg_hi);
1873
        rg_hi = _mm_unpackhi_ps(sr_hi, sg_hi);
1874
        gb_lo = _mm_unpacklo_ps(sg_hi, sb_hi);
1875
        gb_hi = _mm_unpackhi_ps(sg_hi, sb_hi);
1876
        br_hi = _mm_unpackhi_ps(sb_hi, sr_hi);
1877

1878
        store3 = _mm_movelh_ps(rg_lo, sb_hi);
1879
        store3 = _mm_blend_ps(store3,
1880
                              _mm_shuffle_ps(sr_hi, sr_hi,
1881
                                             _MM_SHUFFLE(1, 1, 1, 1)),
1882
                              0x8);
1883
        store4 = _mm_shuffle_ps(gb_lo, rg_hi, 0x4E);
1884
        store5 = _mm_shuffle_ps(br_hi, gb_hi, 0xEC);
1885

1886
        _mm_storeu_ps(store + 12U, store3);
1887
        _mm_storeu_ps(store + 16U, store4);
1888
        _mm_storeu_ps(store + 20U, store5);
1889
    }
1890

1891
    return processed;
1892
}
1893

1894
static SIXEL_TARGET_AVX2 size_t
1895
sixel_linear_to_smptec_avx2(float *pixels, size_t pixel_total)
1896
{
1897
    size_t index;
1898
    size_t processed;
1899
    const __m256 zero = _mm256_set1_ps(0.0f);
1900
    const __m256 one = _mm256_set1_ps(1.0f);
1901
    const __m256 m00 = _mm256_set1_ps(
1902
        sixel_linear_srgb_to_smptec_matrix_f32[0][0]);
1903
    const __m256 m01 = _mm256_set1_ps(
1904
        sixel_linear_srgb_to_smptec_matrix_f32[0][1]);
1905
    const __m256 m02 = _mm256_set1_ps(
1906
        sixel_linear_srgb_to_smptec_matrix_f32[0][2]);
1907
    const __m256 m10 = _mm256_set1_ps(
1908
        sixel_linear_srgb_to_smptec_matrix_f32[1][0]);
1909
    const __m256 m11 = _mm256_set1_ps(
1910
        sixel_linear_srgb_to_smptec_matrix_f32[1][1]);
1911
    const __m256 m12 = _mm256_set1_ps(
1912
        sixel_linear_srgb_to_smptec_matrix_f32[1][2]);
1913
    const __m256 m20 = _mm256_set1_ps(
1914
        sixel_linear_srgb_to_smptec_matrix_f32[2][0]);
1915
    const __m256 m21 = _mm256_set1_ps(
1916
        sixel_linear_srgb_to_smptec_matrix_f32[2][1]);
1917
    const __m256 m22 = _mm256_set1_ps(
1918
        sixel_linear_srgb_to_smptec_matrix_f32[2][2]);
1919
    const __m256i idx_r = _mm256_setr_epi32(
1920
        0, 12, 24, 36, 48, 60, 72, 84);
1921
    const __m256i idx_g = _mm256_setr_epi32(
1922
        4, 16, 28, 40, 52, 64, 76, 88);
1923
    const __m256i idx_b = _mm256_setr_epi32(
1924
        8, 20, 32, 44, 56, 68, 80, 92);
1925
    const char *base_char;
1926
    const float *base;
1927
    float *store;
1928
    __m256 r;
1929
    __m256 g;
1930
    __m256 b;
1931
    __m256 sr;
1932
    __m256 sg;
1933
    __m256 sb;
1934
    __m128 sr_lo;
1935
    __m128 sg_lo;
1936
    __m128 sb_lo;
1937
    __m128 sr_hi;
1938
    __m128 sg_hi;
1939
    __m128 sb_hi;
1940
    __m128 rg_lo;
1941
    __m128 rg_hi;
1942
    __m128 gb_lo;
1943
    __m128 gb_hi;
1944
    __m128 br_hi;
1945
    __m128 store0;
1946
    __m128 store1;
1947
    __m128 store2;
1948
    __m128 store3;
1949
    __m128 store4;
1950
    __m128 store5;
1951

1952
    processed = pixel_total - (pixel_total % 8U);
1953
    for (index = 0U; index < processed; index += 8U) {
1954
        base_char = (const char *)(pixels + index * 3U);
1955
        base = (const float *)base_char;
1956
        store = (float *)base_char;
1957

1958
        r = _mm256_i32gather_ps(base, idx_r, 1);
1959
        g = _mm256_i32gather_ps(base, idx_g, 1);
1960
        b = _mm256_i32gather_ps(base, idx_b, 1);
1961

1962
        sr = SIXEL_FMADD_PS256(g, m01, _mm256_mul_ps(r, m00));
1963
        sr = SIXEL_FMADD_PS256(b, m02, sr);
1964

1965
        sg = SIXEL_FMADD_PS256(g, m11, _mm256_mul_ps(r, m10));
1966
        sg = SIXEL_FMADD_PS256(b, m12, sg);
1967

1968
        sb = SIXEL_FMADD_PS256(g, m21, _mm256_mul_ps(r, m20));
1969
        sb = SIXEL_FMADD_PS256(b, m22, sb);
1970

1971
        sr = _mm256_min_ps(one, _mm256_max_ps(zero, sr));
1972
        sg = _mm256_min_ps(one, _mm256_max_ps(zero, sg));
1973
        sb = _mm256_min_ps(one, _mm256_max_ps(zero, sb));
1974

1975
        sr_lo = _mm256_castps256_ps128(sr);
1976
        sr_hi = _mm256_extractf128_ps(sr, 1);
1977
        sg_lo = _mm256_castps256_ps128(sg);
1978
        sg_hi = _mm256_extractf128_ps(sg, 1);
1979
        sb_lo = _mm256_castps256_ps128(sb);
1980
        sb_hi = _mm256_extractf128_ps(sb, 1);
1981

1982
        /*
1983
         * Re-pack SoA lanes into AoS using shuffle and blend to keep
1984
         * the entire writeback path vectorized without temporary
1985
         * scalar buffers.
1986
         */
1987
        rg_lo = _mm_unpacklo_ps(sr_lo, sg_lo);
1988
        rg_hi = _mm_unpackhi_ps(sr_lo, sg_lo);
1989
        gb_lo = _mm_unpacklo_ps(sg_lo, sb_lo);
1990
        gb_hi = _mm_unpackhi_ps(sg_lo, sb_lo);
1991
        br_hi = _mm_unpackhi_ps(sb_lo, sr_lo);
1992

1993
        store0 = _mm_movelh_ps(rg_lo, sb_lo);
1994
        store0 = _mm_blend_ps(store0,
1995
                              _mm_shuffle_ps(sr_lo, sr_lo,
1996
                                             _MM_SHUFFLE(1, 1, 1, 1)),
1997
                              0x8);
1998
        store1 = _mm_shuffle_ps(gb_lo, rg_hi, 0x4E);
1999
        store2 = _mm_shuffle_ps(br_hi, gb_hi, 0xEC);
2000

2001
        _mm_storeu_ps(store, store0);
2002
        _mm_storeu_ps(store + 4U, store1);
2003
        _mm_storeu_ps(store + 8U, store2);
2004

2005
        rg_lo = _mm_unpacklo_ps(sr_hi, sg_hi);
2006
        rg_hi = _mm_unpackhi_ps(sr_hi, sg_hi);
2007
        gb_lo = _mm_unpacklo_ps(sg_hi, sb_hi);
2008
        gb_hi = _mm_unpackhi_ps(sg_hi, sb_hi);
2009
        br_hi = _mm_unpackhi_ps(sb_hi, sr_hi);
2010

2011
        store3 = _mm_movelh_ps(rg_lo, sb_hi);
2012
        store3 = _mm_blend_ps(store3,
2013
                              _mm_shuffle_ps(sr_hi, sr_hi,
2014
                                             _MM_SHUFFLE(1, 1, 1, 1)),
2015
                              0x8);
2016
        store4 = _mm_shuffle_ps(gb_lo, rg_hi, 0x4E);
2017
        store5 = _mm_shuffle_ps(br_hi, gb_hi, 0xEC);
2018

2019
        _mm_storeu_ps(store + 12U, store3);
2020
        _mm_storeu_ps(store + 16U, store4);
2021
        _mm_storeu_ps(store + 20U, store5);
2022
    }
2023

2024
    return processed;
2025
}
2026
#endif
2027

2028
#if defined(SIXEL_USE_SSE2)
2029
static size_t
2030
sixel_smptec_to_linear_sse2(float *pixels, size_t pixel_total)
2031
{
2032
    size_t index;
2033
    size_t processed;
2034
    const __m128 zero = _mm_set1_ps(0.0f);
2035
    const __m128 one = _mm_set1_ps(1.0f);
2036
    const __m128 m00 = _mm_set1_ps(
2037
        sixel_linear_smptec_to_srgb_matrix_f32[0][0]);
2038
    const __m128 m01 = _mm_set1_ps(
2039
        sixel_linear_smptec_to_srgb_matrix_f32[0][1]);
2040
    const __m128 m02 = _mm_set1_ps(
2041
        sixel_linear_smptec_to_srgb_matrix_f32[0][2]);
2042
    const __m128 m10 = _mm_set1_ps(
2043
        sixel_linear_smptec_to_srgb_matrix_f32[1][0]);
2044
    const __m128 m11 = _mm_set1_ps(
2045
        sixel_linear_smptec_to_srgb_matrix_f32[1][1]);
2046
    const __m128 m12 = _mm_set1_ps(
2047
        sixel_linear_smptec_to_srgb_matrix_f32[1][2]);
2048
    const __m128 m20 = _mm_set1_ps(
2049
        sixel_linear_smptec_to_srgb_matrix_f32[2][0]);
2050
    const __m128 m21 = _mm_set1_ps(
2051
        sixel_linear_smptec_to_srgb_matrix_f32[2][1]);
2052
    const __m128 m22 = _mm_set1_ps(
2053
        sixel_linear_smptec_to_srgb_matrix_f32[2][2]);
2054
    float r_out[4];
2055
    float g_out[4];
2056
    float b_out[4];
2057
    __m128 vec0;
2058
    __m128 vec1;
2059
    __m128 vec2;
2060

2061
    processed = pixel_total - (pixel_total % 4U);
2062
    for (index = 0U; index < processed; index += 4U) {
×
2063
        float *base;
2064
        __m128 r;
2065
        __m128 g;
2066
        __m128 b;
2067
        __m128 r_lin;
2068
        __m128 g_lin;
2069
        __m128 b_lin;
2070

2071
        base = pixels + index * 3U;
2072

2073
        /*
2074
         * SSE2 lacks gathers, so load interleaved RGB triplets with
2075
         * scalar addressing and pack them lane-wise. Lane order is
2076
         * preserved (pixel0..pixel3) to reuse the scatter below.
2077
         */
2078
        r = _mm_set_ps(base[9], base[6], base[3], base[0]);
2079
        g = _mm_set_ps(base[10], base[7], base[4], base[1]);
2080
        b = _mm_set_ps(base[11], base[8], base[5], base[2]);
2081

2082
        r_lin = _mm_add_ps(_mm_mul_ps(r, m00), _mm_mul_ps(g, m01));
2083
        r_lin = _mm_add_ps(r_lin, _mm_mul_ps(b, m02));
2084

2085
        g_lin = _mm_add_ps(_mm_mul_ps(r, m10), _mm_mul_ps(g, m11));
2086
        g_lin = _mm_add_ps(g_lin, _mm_mul_ps(b, m12));
2087

2088
        b_lin = _mm_add_ps(_mm_mul_ps(r, m20), _mm_mul_ps(g, m21));
2089
        b_lin = _mm_add_ps(b_lin, _mm_mul_ps(b, m22));
2090

2091
        r_lin = _mm_min_ps(one, _mm_max_ps(zero, r_lin));
2092
        g_lin = _mm_min_ps(one, _mm_max_ps(zero, g_lin));
2093
        b_lin = _mm_min_ps(one, _mm_max_ps(zero, b_lin));
2094

2095
        _mm_storeu_ps(r_out, r_lin);
2096
        _mm_storeu_ps(g_out, g_lin);
2097
        _mm_storeu_ps(b_out, b_lin);
2098

2099
        /*
2100
         * Re-interleave SIMD lanes back into AoS layout with vector
2101
         * stores to avoid the scalar scatter loop. Layout per store:
2102
         *   vec0 -> [r0 g0 b0 r1]
2103
         *   vec1 -> [g1 b1 r2 g2]
2104
         *   vec2 -> [b2 r3 g3 b3]
2105
         */
2106
        vec0 = _mm_setr_ps(r_out[0], g_out[0], b_out[0], r_out[1]);
2107
        vec1 = _mm_setr_ps(g_out[1], b_out[1], r_out[2], g_out[2]);
2108
        vec2 = _mm_setr_ps(b_out[2], r_out[3], g_out[3], b_out[3]);
2109

2110
        _mm_storeu_ps(base, vec0);
2111
        _mm_storeu_ps(base + 4U, vec1);
2112
        _mm_storeu_ps(base + 8U, vec2);
2113
    }
2114

2115
    return processed;
2116
}
2117

2118
static size_t
2119
sixel_linear_to_smptec_sse2(float *pixels, size_t pixel_total)
2120
{
2121
    size_t index;
2122
    size_t processed;
2123
    const __m128 zero = _mm_set1_ps(0.0f);
2124
    const __m128 one = _mm_set1_ps(1.0f);
2125
    const __m128 m00 = _mm_set1_ps(
2126
        sixel_linear_srgb_to_smptec_matrix_f32[0][0]);
2127
    const __m128 m01 = _mm_set1_ps(
2128
        sixel_linear_srgb_to_smptec_matrix_f32[0][1]);
2129
    const __m128 m02 = _mm_set1_ps(
2130
        sixel_linear_srgb_to_smptec_matrix_f32[0][2]);
2131
    const __m128 m10 = _mm_set1_ps(
2132
        sixel_linear_srgb_to_smptec_matrix_f32[1][0]);
2133
    const __m128 m11 = _mm_set1_ps(
2134
        sixel_linear_srgb_to_smptec_matrix_f32[1][1]);
2135
    const __m128 m12 = _mm_set1_ps(
2136
        sixel_linear_srgb_to_smptec_matrix_f32[1][2]);
2137
    const __m128 m20 = _mm_set1_ps(
2138
        sixel_linear_srgb_to_smptec_matrix_f32[2][0]);
2139
    const __m128 m21 = _mm_set1_ps(
2140
        sixel_linear_srgb_to_smptec_matrix_f32[2][1]);
2141
    const __m128 m22 = _mm_set1_ps(
2142
        sixel_linear_srgb_to_smptec_matrix_f32[2][2]);
2143
    float sr_out[4];
2144
    float sg_out[4];
2145
    float sb_out[4];
2146
    __m128 vec0;
2147
    __m128 vec1;
2148
    __m128 vec2;
2149

2150
    processed = pixel_total - (pixel_total % 4U);
2151
    for (index = 0U; index < processed; index += 4U) {
×
2152
        float *base;
2153
        __m128 r;
2154
        __m128 g;
2155
        __m128 b;
2156
        __m128 sr;
2157
        __m128 sg;
2158
        __m128 sb;
2159

2160
        base = pixels + index * 3U;
2161

2162
        /*
2163
         * Expand interleaved linear RGB into lane-aligned vectors to
2164
         * reuse the same scatter order as the SMPTEC→linear path.
2165
         */
2166
        r = _mm_set_ps(base[9], base[6], base[3], base[0]);
2167
        g = _mm_set_ps(base[10], base[7], base[4], base[1]);
2168
        b = _mm_set_ps(base[11], base[8], base[5], base[2]);
2169

2170
        sr = _mm_add_ps(_mm_mul_ps(r, m00), _mm_mul_ps(g, m01));
2171
        sr = _mm_add_ps(sr, _mm_mul_ps(b, m02));
2172

2173
        sg = _mm_add_ps(_mm_mul_ps(r, m10), _mm_mul_ps(g, m11));
2174
        sg = _mm_add_ps(sg, _mm_mul_ps(b, m12));
2175

2176
        sb = _mm_add_ps(_mm_mul_ps(r, m20), _mm_mul_ps(g, m21));
2177
        sb = _mm_add_ps(sb, _mm_mul_ps(b, m22));
2178

2179
        sr = _mm_min_ps(one, _mm_max_ps(zero, sr));
2180
        sg = _mm_min_ps(one, _mm_max_ps(zero, sg));
2181
        sb = _mm_min_ps(one, _mm_max_ps(zero, sb));
2182

2183
        _mm_storeu_ps(sr_out, sr);
2184
        _mm_storeu_ps(sg_out, sg);
2185
        _mm_storeu_ps(sb_out, sb);
2186

2187
        /*
2188
         * Use three vector stores to re-pack SoA lanes into the AoS
2189
         * buffer. This mirrors the SMPTEC→linear SSE2 path to keep
2190
         * scatter symmetric and branch-free.
2191
         */
2192
        vec0 = _mm_setr_ps(sr_out[0], sg_out[0], sb_out[0], sr_out[1]);
2193
        vec1 = _mm_setr_ps(sg_out[1], sb_out[1], sr_out[2], sg_out[2]);
2194
        vec2 = _mm_setr_ps(sb_out[2], sr_out[3], sg_out[3], sb_out[3]);
2195

2196
        _mm_storeu_ps(base, vec0);
2197
        _mm_storeu_ps(base + 4U, vec1);
2198
        _mm_storeu_ps(base + 8U, vec2);
2199
    }
2200

2201
    return processed;
2202
}
2203
#endif
2204

2205
#if defined(SIXEL_USE_NEON)
2206
static size_t
2207
sixel_smptec_to_linear_neon(float *pixels, size_t pixel_total)
2208
{
2209
    size_t index;
2210
    size_t processed;
2211
    const float32x4_t zero = vdupq_n_f32(0.0f);
2212
    const float32x4_t one = vdupq_n_f32(1.0f);
2213
    const float32x4_t m00 = vdupq_n_f32(
2214
        sixel_linear_smptec_to_srgb_matrix_f32[0][0]);
2215
    const float32x4_t m01 = vdupq_n_f32(
2216
        sixel_linear_smptec_to_srgb_matrix_f32[0][1]);
2217
    const float32x4_t m02 = vdupq_n_f32(
2218
        sixel_linear_smptec_to_srgb_matrix_f32[0][2]);
2219
    const float32x4_t m10 = vdupq_n_f32(
2220
        sixel_linear_smptec_to_srgb_matrix_f32[1][0]);
2221
    const float32x4_t m11 = vdupq_n_f32(
2222
        sixel_linear_smptec_to_srgb_matrix_f32[1][1]);
2223
    const float32x4_t m12 = vdupq_n_f32(
2224
        sixel_linear_smptec_to_srgb_matrix_f32[1][2]);
2225
    const float32x4_t m20 = vdupq_n_f32(
2226
        sixel_linear_smptec_to_srgb_matrix_f32[2][0]);
2227
    const float32x4_t m21 = vdupq_n_f32(
2228
        sixel_linear_smptec_to_srgb_matrix_f32[2][1]);
2229
    const float32x4_t m22 = vdupq_n_f32(
2230
        sixel_linear_smptec_to_srgb_matrix_f32[2][2]);
2231

2232
    processed = pixel_total - (pixel_total % 4U);
2233
    for (index = 0U; index < processed; index += 4U) {
×
2234
        float32x4x3_t rgb;
2235
        float32x4_t r_lin;
2236
        float32x4_t g_lin;
2237
        float32x4_t b_lin;
2238
        float32x4x3_t out;
2239

2240
        rgb = vld3q_f32(pixels + index * 3U);
2241

2242
        r_lin = vmlaq_f32(vmulq_f32(rgb.val[0], m00),
2243
                          rgb.val[1], m01);
2244
        r_lin = vmlaq_f32(r_lin, rgb.val[2], m02);
2245

2246
        g_lin = vmlaq_f32(vmulq_f32(rgb.val[0], m10),
2247
                          rgb.val[1], m11);
2248
        g_lin = vmlaq_f32(g_lin, rgb.val[2], m12);
2249

2250
        b_lin = vmlaq_f32(vmulq_f32(rgb.val[0], m20),
2251
                          rgb.val[1], m21);
2252
        b_lin = vmlaq_f32(b_lin, rgb.val[2], m22);
2253

2254
        r_lin = vminq_f32(one, vmaxq_f32(zero, r_lin));
2255
        g_lin = vminq_f32(one, vmaxq_f32(zero, g_lin));
2256
        b_lin = vminq_f32(one, vmaxq_f32(zero, b_lin));
2257

2258
        out.val[0] = r_lin;
2259
        out.val[1] = g_lin;
2260
        out.val[2] = b_lin;
2261

2262
        vst3q_f32(pixels + index * 3U, out);
2263
    }
2264

2265
    return processed;
2266
}
2267

2268
static size_t
2269
sixel_linear_to_smptec_neon(float *pixels, size_t pixel_total)
2270
{
2271
    size_t index;
2272
    size_t processed;
2273
    const float32x4_t zero = vdupq_n_f32(0.0f);
2274
    const float32x4_t one = vdupq_n_f32(1.0f);
2275
    const float32x4_t m00 = vdupq_n_f32(
2276
        sixel_linear_srgb_to_smptec_matrix_f32[0][0]);
2277
    const float32x4_t m01 = vdupq_n_f32(
2278
        sixel_linear_srgb_to_smptec_matrix_f32[0][1]);
2279
    const float32x4_t m02 = vdupq_n_f32(
2280
        sixel_linear_srgb_to_smptec_matrix_f32[0][2]);
2281
    const float32x4_t m10 = vdupq_n_f32(
2282
        sixel_linear_srgb_to_smptec_matrix_f32[1][0]);
2283
    const float32x4_t m11 = vdupq_n_f32(
2284
        sixel_linear_srgb_to_smptec_matrix_f32[1][1]);
2285
    const float32x4_t m12 = vdupq_n_f32(
2286
        sixel_linear_srgb_to_smptec_matrix_f32[1][2]);
2287
    const float32x4_t m20 = vdupq_n_f32(
2288
        sixel_linear_srgb_to_smptec_matrix_f32[2][0]);
2289
    const float32x4_t m21 = vdupq_n_f32(
2290
        sixel_linear_srgb_to_smptec_matrix_f32[2][1]);
2291
    const float32x4_t m22 = vdupq_n_f32(
2292
        sixel_linear_srgb_to_smptec_matrix_f32[2][2]);
2293

2294
    processed = pixel_total - (pixel_total % 4U);
2295
    for (index = 0U; index < processed; index += 4U) {
×
2296
        float32x4x3_t rgb;
2297
        float32x4_t sr;
2298
        float32x4_t sg;
2299
        float32x4_t sb;
2300
        float32x4x3_t out;
2301

2302
        rgb = vld3q_f32(pixels + index * 3U);
2303

2304
        sr = vmlaq_f32(vmulq_f32(rgb.val[0], m00),
2305
                       rgb.val[1], m01);
2306
        sr = vmlaq_f32(sr, rgb.val[2], m02);
2307

2308
        sg = vmlaq_f32(vmulq_f32(rgb.val[0], m10),
2309
                       rgb.val[1], m11);
2310
        sg = vmlaq_f32(sg, rgb.val[2], m12);
2311

2312
        sb = vmlaq_f32(vmulq_f32(rgb.val[0], m20),
2313
                       rgb.val[1], m21);
2314
        sb = vmlaq_f32(sb, rgb.val[2], m22);
2315

2316
        sr = vminq_f32(one, vmaxq_f32(zero, sr));
2317
        sg = vminq_f32(one, vmaxq_f32(zero, sg));
2318
        sb = vminq_f32(one, vmaxq_f32(zero, sb));
2319

2320
        out.val[0] = sr;
2321
        out.val[1] = sg;
2322
        out.val[2] = sb;
2323

2324
        vst3q_f32(pixels + index * 3U, out);
2325
    }
2326

2327
    return processed;
2328
}
2329
#endif
2330

2331
static void
2332
sixel_smptec_to_linear_float_simd(float *pixels,
×
2333
                                  size_t pixel_total,
2334
                                  int simd_level)
2335
{
2336
    size_t processed;
×
2337
    size_t index;
×
2338

2339
    processed = 0U;
×
2340

2341
    (void)simd_level;
×
2342

2343
#if defined(SIXEL_USE_AVX512) && defined(__AVX512F__) && \
2344
        defined(__AVX512BW__)
2345
    if (simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
2346
        processed = sixel_smptec_to_linear_avx512(pixels, pixel_total);
2347
    }
2348
#endif
2349

2350
#if defined(SIXEL_USE_AVX2) && defined(__AVX2__)
2351
    if (processed < pixel_total &&
2352
            simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
2353
        processed += sixel_smptec_to_linear_avx2(
2354
            pixels + processed * 3U, pixel_total - processed);
2355
    }
2356
#endif
2357

2358
#if defined(SIXEL_USE_SSE2)
2359
    if (processed < pixel_total &&
2360
            simd_level >= SIXEL_SIMD_LEVEL_SSE2) {
×
2361
        processed += sixel_smptec_to_linear_sse2(
2362
            pixels + processed * 3U, pixel_total - processed);
2363
    }
2364
#endif
2365

2366
#if defined(SIXEL_USE_NEON)
2367
    if (processed < pixel_total &&
×
2368
            simd_level == SIXEL_SIMD_LEVEL_NEON) {
2369
        processed += sixel_smptec_to_linear_neon(
2370
            pixels + processed * 3U, pixel_total - processed);
2371
    }
2372
#endif
2373

2374
    for (index = processed; index < pixel_total; ++index) {
×
2375
        float *pixel;
×
2376

2377
        pixel = pixels + index * 3U;
×
2378
        sixel_smptec_to_linear_scalar(pixel + 0, pixel + 1, pixel + 2);
×
2379
    }
2380
}
×
2381

2382
static void
2383
sixel_linear_to_smptec_float_simd(float *pixels,
×
2384
                                  size_t pixel_total,
2385
                                  int simd_level)
2386
{
2387
    size_t processed;
×
2388
    size_t index;
×
2389

2390
    processed = 0U;
×
2391

2392
    (void)simd_level;
×
2393

2394
#if defined(SIXEL_USE_AVX512) && defined(__AVX512F__) && \
2395
        defined(__AVX512BW__)
2396
    if (simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
2397
        processed = sixel_linear_to_smptec_avx512(pixels, pixel_total);
2398
    }
2399
#endif
2400

2401
#if defined(SIXEL_USE_AVX2) && defined(__AVX2__)
2402
    if (processed < pixel_total &&
2403
            simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
2404
        processed += sixel_linear_to_smptec_avx2(
2405
            pixels + processed * 3U, pixel_total - processed);
2406
    }
2407
#endif
2408

2409
#if defined(SIXEL_USE_SSE2)
2410
    if (processed < pixel_total &&
2411
            simd_level >= SIXEL_SIMD_LEVEL_SSE2) {
×
2412
        processed += sixel_linear_to_smptec_sse2(
2413
            pixels + processed * 3U, pixel_total - processed);
2414
    }
2415
#endif
2416

2417
#if defined(SIXEL_USE_NEON)
2418
    if (processed < pixel_total &&
×
2419
            simd_level == SIXEL_SIMD_LEVEL_NEON) {
2420
        processed += sixel_linear_to_smptec_neon(
2421
            pixels + processed * 3U, pixel_total - processed);
2422
    }
2423
#endif
2424

2425
    for (index = processed; index < pixel_total; ++index) {
×
2426
        float *pixel;
×
2427

2428
        pixel = pixels + index * 3U;
×
2429
        sixel_linear_to_smptec_scalar(pixel + 0, pixel + 1, pixel + 2);
×
2430
    }
2431
}
×
2432

2433
static void
2434
sixel_colorspace_init_tables(void)
99✔
2435
{
2436
    int i;
99✔
2437
    double gamma_value;
99✔
2438
    double linear_value;
99✔
2439
    double converted;
99✔
2440

2441
    if (tables_initialized) {
99!
2442
        return;
2443
    }
2444

2445
    /*
2446
     * Use the canonical sRGB transfer functions for the LUT to avoid
2447
     * compounding approximation error.  The pow() calls are confined to
2448
     * this one-time initialisation so the runtime conversion paths stay
2449
     * unaffected while keeping the mapping faithful.
2450
     */
2451
    for (i = 0; i < SIXEL_COLORSPACE_LUT_SIZE; ++i) {
25,443!
2452
        gamma_value = (double)i / 255.0;
25,344✔
2453
        if (gamma_value <= 0.04045) {
25,344!
2454
            converted = gamma_value / 12.92;
1,089✔
2455
        } else {
2456
            converted = pow((gamma_value + 0.055) / 1.055, 2.4);
24,255✔
2457
        }
2458
        gamma_to_linear_lut[i] =
50,688✔
2459
            sixel_colorspace_clamp((int)(converted * 255.0 + 0.5));
50,688!
2460
    }
2461

2462
    for (i = 0; i < SIXEL_COLORSPACE_LUT_SIZE; ++i) {
25,443!
2463
        linear_value = (double)i / 255.0;
25,344✔
2464
        if (linear_value <= 0.0031308) {
25,344!
2465
            converted = linear_value * 12.92;
99✔
2466
        } else {
2467
            converted = 1.055 * pow(linear_value, 1.0 / 2.4) - 0.055;
25,245✔
2468
        }
2469
        linear_to_gamma_lut[i] =
50,688✔
2470
            sixel_colorspace_clamp((int)(converted * 255.0 + 0.5));
50,688!
2471
    }
2472

2473
#if (defined(SIXEL_USE_AVX2) && defined(__AVX2__)) || \
2474
        (defined(SIXEL_USE_AVX512) && defined(__AVX512F__) && \
2475
         defined(__AVX512BW__))
2476
    if (!sixel_avx_tables_initialized) {
2477
        for (i = 0; i < SIXEL_COLORSPACE_LUT_SIZE; ++i) {
2478
            sixel_avx_gamma_to_linear_lut32[i] =
2479
                (uint32_t)gamma_to_linear_lut[i];
2480
            sixel_avx_linear_to_gamma_lut32[i] =
2481
                (uint32_t)linear_to_gamma_lut[i];
2482
        }
2483

2484
        sixel_avx_tables_initialized = 1;
2485
    }
2486
#endif
2487

2488
    tables_initialized = 1;
99✔
2489
}
×
2490

2491
static void
2492
sixel_decode_linear_from_colorspace(int colorspace,
×
2493
                                    unsigned char r8,
2494
                                    unsigned char g8,
2495
                                    unsigned char b8,
2496
                                    double *r_lin,
2497
                                    double *g_lin,
2498
                                    double *b_lin)
2499
{
2500
    switch (colorspace) {
×
2501
    case SIXEL_COLORSPACE_GAMMA:
×
2502
        *r_lin = sixel_srgb_to_linear_double(r8);
×
2503
        *g_lin = sixel_srgb_to_linear_double(g8);
×
2504
        *b_lin = sixel_srgb_to_linear_double(b8);
×
2505
        break;
×
2506
    case SIXEL_COLORSPACE_LINEAR:
×
2507
        *r_lin = (double)r8 / 255.0;
×
2508
        *g_lin = (double)g8 / 255.0;
×
2509
        *b_lin = (double)b8 / 255.0;
×
2510
        break;
×
2511
    case SIXEL_COLORSPACE_OKLAB:
×
2512
    {
2513
        double L = (double)r8 / 255.0;
×
2514
        double A = sixel_oklab_decode_ab(g8);
×
2515
        double B = sixel_oklab_decode_ab(b8);
×
2516
        sixel_oklab_to_linear(L, A, B, r_lin, g_lin, b_lin);
×
2517
        break;
×
2518
    }
2519
    case SIXEL_COLORSPACE_CIELAB:
×
2520
    {
2521
        double L;
×
2522
        double A;
×
2523
        double B;
×
2524

2525
        L = (double)r8 / 255.0;
×
2526
        A = sixel_cielab_decode_ab(g8);
×
2527
        B = sixel_cielab_decode_ab(b8);
×
2528
        sixel_cielab_to_linear(L, A, B, r_lin, g_lin, b_lin);
×
2529
        break;
×
2530
    }
2531
    case SIXEL_COLORSPACE_DIN99D:
×
2532
    {
2533
        double L;
×
2534
        double A;
×
2535
        double B;
×
2536

2537
        L = (double)r8 / 255.0;
×
2538
        A = sixel_din99d_decode_ab(g8);
×
2539
        B = sixel_din99d_decode_ab(b8);
×
2540
        sixel_din99d_to_linear(L, A, B, r_lin, g_lin, b_lin);
×
2541
        break;
×
2542
    }
2543
    case SIXEL_COLORSPACE_SMPTEC:
×
2544
    {
2545
        double r_smptec = sixel_smptec_to_linear_double(r8);
×
2546
        double g_smptec = sixel_smptec_to_linear_double(g8);
×
2547
        double b_smptec = sixel_smptec_to_linear_double(b8);
×
2548
        sixel_linear_smptec_to_srgb(r_smptec, g_smptec, b_smptec,
×
2549
                                    r_lin, g_lin, b_lin);
2550
        break;
×
2551
    }
2552
    default:
×
2553
        *r_lin = (double)r8 / 255.0;
×
2554
        *g_lin = (double)g8 / 255.0;
×
2555
        *b_lin = (double)b8 / 255.0;
×
2556
        break;
×
2557
    }
2558
}
×
2559

2560
/*
2561
 * Float variant of the colorspace decoder so RGBFLOAT32 buffers can skip the
2562
 * byte quantisation that the legacy helper performs.
2563
 */
2564
static void
2565
sixel_decode_linear_from_colorspace_float(int colorspace,
×
2566
                                          float r_value,
2567
                                          float g_value,
2568
                                          float b_value,
2569
                                          double *r_lin,
2570
                                          double *g_lin,
2571
                                          double *b_lin)
2572
{
2573
    double r;
×
2574
    double g;
×
2575
    double b;
×
2576

2577
    r = (double)r_value;
×
2578
    g = (double)g_value;
×
2579
    b = (double)b_value;
×
2580

2581
    switch (colorspace) {
×
2582
    case SIXEL_COLORSPACE_GAMMA:
×
2583
        *r_lin = sixel_srgb_unit_to_linear(r);
×
2584
        *g_lin = sixel_srgb_unit_to_linear(g);
×
2585
        *b_lin = sixel_srgb_unit_to_linear(b);
×
2586
        break;
×
2587
    case SIXEL_COLORSPACE_LINEAR:
2588
        *r_lin = sixel_clamp_unit(r);
×
2589
        *g_lin = sixel_clamp_unit(g);
×
2590
        *b_lin = sixel_clamp_unit(b);
×
2591
        break;
×
2592
    case SIXEL_COLORSPACE_OKLAB:
×
2593
        sixel_oklab_to_linear(r, g, b, r_lin, g_lin, b_lin);
×
2594
        break;
×
2595
    case SIXEL_COLORSPACE_CIELAB:
×
2596
        sixel_cielab_to_linear(r, g, b, r_lin, g_lin, b_lin);
×
2597
        break;
×
2598
    case SIXEL_COLORSPACE_DIN99D:
×
2599
        sixel_din99d_to_linear(r, g, b, r_lin, g_lin, b_lin);
×
2600
        break;
×
2601
    case SIXEL_COLORSPACE_SMPTEC:
×
2602
    {
2603
        double r_smptec;
×
2604
        double g_smptec;
×
2605
        double b_smptec;
×
2606

2607
        r_smptec = sixel_smptec_unit_to_linear(r);
×
2608
        g_smptec = sixel_smptec_unit_to_linear(g);
×
2609
        b_smptec = sixel_smptec_unit_to_linear(b);
×
2610
        sixel_linear_smptec_to_srgb(r_smptec,
×
2611
                                    g_smptec,
2612
                                    b_smptec,
2613
                                    r_lin,
2614
                                    g_lin,
2615
                                    b_lin);
2616
        break;
×
2617
    }
2618
    default:
2619
        *r_lin = sixel_clamp_unit(r);
×
2620
        *g_lin = sixel_clamp_unit(g);
×
2621
        *b_lin = sixel_clamp_unit(b);
×
2622
        break;
×
2623
    }
2624
}
×
2625

2626
static void
2627
sixel_encode_linear_to_colorspace(int colorspace,
×
2628
                                  double r_lin,
2629
                                  double g_lin,
2630
                                  double b_lin,
2631
                                  unsigned char *r8,
2632
                                  unsigned char *g8,
2633
                                  unsigned char *b8)
2634
{
2635
    double L;
×
2636
    double A;
×
2637
    double B;
×
2638

2639
    switch (colorspace) {
×
2640
    case SIXEL_COLORSPACE_GAMMA:
×
2641
        *r8 = sixel_linear_double_to_srgb(r_lin);
×
2642
        *g8 = sixel_linear_double_to_srgb(g_lin);
×
2643
        *b8 = sixel_linear_double_to_srgb(b_lin);
×
2644
        break;
×
2645
    case SIXEL_COLORSPACE_LINEAR:
×
2646
        *r8 = sixel_linear_double_to_byte(r_lin);
×
2647
        *g8 = sixel_linear_double_to_byte(g_lin);
×
2648
        *b8 = sixel_linear_double_to_byte(b_lin);
×
2649
        break;
×
2650
    case SIXEL_COLORSPACE_OKLAB:
×
2651
        sixel_linear_to_oklab(r_lin, g_lin, b_lin, &L, &A, &B);
×
2652
        *r8 = sixel_oklab_encode_L(L);
×
2653
        *g8 = sixel_oklab_encode_ab(A);
×
2654
        *b8 = sixel_oklab_encode_ab(B);
×
2655
        break;
×
2656
    case SIXEL_COLORSPACE_CIELAB:
×
2657
        sixel_linear_to_cielab(r_lin, g_lin, b_lin, &L, &A, &B);
×
2658
        *r8 = sixel_cielab_encode_L(L);
×
2659
        *g8 = sixel_cielab_encode_ab(A);
×
2660
        *b8 = sixel_cielab_encode_ab(B);
×
2661
        break;
×
2662
    case SIXEL_COLORSPACE_DIN99D:
×
2663
        sixel_linear_to_din99d(r_lin, g_lin, b_lin, &L, &A, &B);
×
2664
        *r8 = sixel_din99d_encode_L(L);
×
2665
        *g8 = sixel_din99d_encode_ab(A);
×
2666
        *b8 = sixel_din99d_encode_ab(B);
×
2667
        break;
×
2668
    case SIXEL_COLORSPACE_SMPTEC:
×
2669
    {
2670
        double r_smptec;
×
2671
        double g_smptec;
×
2672
        double b_smptec;
×
2673

2674
        sixel_linear_srgb_to_smptec(r_lin, g_lin, b_lin,
×
2675
                                     &r_smptec, &g_smptec, &b_smptec);
2676

2677
        *r8 = sixel_linear_double_to_smptec(r_smptec);
×
2678
        *g8 = sixel_linear_double_to_smptec(g_smptec);
×
2679
        *b8 = sixel_linear_double_to_smptec(b_smptec);
×
2680
        break;
×
2681
    }
2682
    default:
×
2683
        *r8 = sixel_linear_double_to_byte(r_lin);
×
2684
        *g8 = sixel_linear_double_to_byte(g_lin);
×
2685
        *b8 = sixel_linear_double_to_byte(b_lin);
×
2686
        break;
×
2687
    }
2688
}
×
2689

2690
static void
2691
sixel_encode_linear_to_colorspace_float(int colorspace,
×
2692
                                        double r_lin,
2693
                                        double g_lin,
2694
                                        double b_lin,
2695
                                        float *r_value,
2696
                                        float *g_value,
2697
                                        float *b_value)
2698
{
2699
    double r;
×
2700
    double g;
×
2701
    double b;
×
2702

2703
    switch (colorspace) {
×
2704
    case SIXEL_COLORSPACE_GAMMA:
×
2705
        r = sixel_linear_to_srgb_unit(r_lin);
×
2706
        g = sixel_linear_to_srgb_unit(g_lin);
×
2707
        b = sixel_linear_to_srgb_unit(b_lin);
×
2708
        break;
×
2709
    case SIXEL_COLORSPACE_LINEAR:
2710
        r = sixel_clamp_unit(r_lin);
×
2711
        g = sixel_clamp_unit(g_lin);
×
2712
        b = sixel_clamp_unit(b_lin);
×
2713
        break;
×
2714
    case SIXEL_COLORSPACE_OKLAB:
×
2715
        sixel_linear_to_oklab(r_lin, g_lin, b_lin, &r, &g, &b);
×
2716
        r = sixel_clamp_unit(r);
×
2717
        g = sixel_oklab_clamp_ab(g);
×
2718
        b = sixel_oklab_clamp_ab(b);
×
2719
        break;
×
2720
    case SIXEL_COLORSPACE_CIELAB:
×
2721
        sixel_linear_to_cielab(r_lin, g_lin, b_lin, &r, &g, &b);
×
2722
        r = sixel_clamp_unit(r);
×
2723
        g = sixel_cielab_clamp_ab(g);
×
2724
        b = sixel_cielab_clamp_ab(b);
×
2725
        break;
×
2726
    case SIXEL_COLORSPACE_DIN99D:
×
2727
        sixel_linear_to_din99d(r_lin, g_lin, b_lin, &r, &g, &b);
×
2728
        r = sixel_clamp_unit(r);
×
2729
        g = sixel_din99d_clamp_ab_norm(g);
×
2730
        b = sixel_din99d_clamp_ab_norm(b);
×
2731
        break;
×
2732
    case SIXEL_COLORSPACE_SMPTEC:
×
2733
    {
2734
        double r_smptec;
×
2735
        double g_smptec;
×
2736
        double b_smptec;
×
2737

2738
        sixel_linear_srgb_to_smptec(r_lin,
×
2739
                                     g_lin,
2740
                                     b_lin,
2741
                                     &r_smptec,
2742
                                     &g_smptec,
2743
                                     &b_smptec);
2744
        r = sixel_linear_to_smptec_unit(r_smptec);
×
2745
        g = sixel_linear_to_smptec_unit(g_smptec);
×
2746
        b = sixel_linear_to_smptec_unit(b_smptec);
×
2747
        break;
×
2748
    }
2749
    default:
2750
        r = sixel_clamp_unit(r_lin);
×
2751
        g = sixel_clamp_unit(g_lin);
×
2752
        b = sixel_clamp_unit(b_lin);
×
2753
        break;
×
2754
    }
2755

2756
    *r_value = (float)r;
×
2757
    *g_value = (float)g;
×
2758
    *b_value = (float)b;
×
2759
}
×
2760

2761
static SIXELSTATUS
2762
sixel_convert_pixels_via_linear(unsigned char *pixels,
×
2763
                                size_t size,
2764
                                int pixelformat,
2765
                                int colorspace_src,
2766
                                int colorspace_dst)
2767
{
2768
    size_t i;
×
2769
    int step;
×
2770
    int index_r;
×
2771
    int index_g;
×
2772
    int index_b;
×
2773

2774
    if (colorspace_src == colorspace_dst) {
×
2775
        return SIXEL_OK;
2776
    }
2777

2778
    switch (pixelformat) {
×
2779
    case SIXEL_PIXELFORMAT_RGB888:
2780
        step = 3;
2781
        index_r = 0;
2782
        index_g = 1;
2783
        index_b = 2;
2784
        break;
2785
    case SIXEL_PIXELFORMAT_BGR888:
×
2786
        step = 3;
×
2787
        index_r = 2;
×
2788
        index_g = 1;
×
2789
        index_b = 0;
×
2790
        break;
×
2791
    case SIXEL_PIXELFORMAT_RGBA8888:
×
2792
        step = 4;
×
2793
        index_r = 0;
×
2794
        index_g = 1;
×
2795
        index_b = 2;
×
2796
        break;
×
2797
    case SIXEL_PIXELFORMAT_BGRA8888:
×
2798
        step = 4;
×
2799
        index_r = 2;
×
2800
        index_g = 1;
×
2801
        index_b = 0;
×
2802
        break;
×
2803
    case SIXEL_PIXELFORMAT_ARGB8888:
×
2804
        step = 4;
×
2805
        index_r = 1;
×
2806
        index_g = 2;
×
2807
        index_b = 3;
×
2808
        break;
×
2809
    case SIXEL_PIXELFORMAT_ABGR8888:
×
2810
        step = 4;
×
2811
        index_r = 3;
×
2812
        index_g = 2;
×
2813
        index_b = 1;
×
2814
        break;
×
2815
    case SIXEL_PIXELFORMAT_G8:
×
2816
        step = 1;
×
2817
        index_r = 0;
×
2818
        index_g = 0;
×
2819
        index_b = 0;
×
2820
        break;
×
2821
    case SIXEL_PIXELFORMAT_GA88:
×
2822
        step = 2;
×
2823
        index_r = 0;
×
2824
        index_g = 0;
×
2825
        index_b = 0;
×
2826
        break;
×
2827
    case SIXEL_PIXELFORMAT_AG88:
×
2828
        step = 2;
×
2829
        index_r = 1;
×
2830
        index_g = 1;
×
2831
        index_b = 1;
×
2832
        break;
×
2833
    default:
2834
        return SIXEL_BAD_INPUT;
2835
    }
2836

2837
    if (size % (size_t)step != 0) {
×
2838
        return SIXEL_BAD_INPUT;
2839
    }
2840

2841
    for (i = 0; i < size; i += (size_t)step) {
×
2842
        unsigned char *pr = pixels + i + (size_t)index_r;
×
2843
        unsigned char *pg = pixels + i + (size_t)index_g;
×
2844
        unsigned char *pb = pixels + i + (size_t)index_b;
×
2845
        double r_lin;
×
2846
        double g_lin;
×
2847
        double b_lin;
×
2848

2849
        sixel_decode_linear_from_colorspace(colorspace_src,
×
2850
                                            *pr,
×
2851
                                            *pg,
×
2852
                                            *pb,
×
2853
                                            &r_lin,
2854
                                            &g_lin,
2855
                                            &b_lin);
2856

2857
        sixel_encode_linear_to_colorspace(colorspace_dst,
×
2858
                                          r_lin,
2859
                                          g_lin,
2860
                                          b_lin,
2861
                                          pr,
2862
                                          pg,
2863
                                          pb);
2864
    }
2865

2866
    return SIXEL_OK;
2867
}
2868

2869
/*
2870
 * Convert RGBFLOAT32 buffers in-place by round-tripping through linear space.
2871
 * The general path keeps double intermediates for OKLab/CIELAB precision, and
2872
 * the SMPTEC↔linear pair is specialized with SIMD float math to accelerate the
2873
 * matrix multiply without changing the storage layout.
2874
 */
2875
static SIXELSTATUS
2876
sixel_convert_pixels_via_linear_float_chunk(float *pixels,
×
2877
                                            size_t pixel_total,
2878
                                            int colorspace_src,
2879
                                            int colorspace_dst,
2880
                                            int simd_level)
2881
{
2882
    size_t index;
×
2883
    size_t base;
×
2884
    double r_lin;
×
2885
    double g_lin;
×
2886
    double b_lin;
×
2887
    float *pr;
×
2888
    float *pg;
×
2889
    float *pb;
×
2890

2891
    if (colorspace_src == colorspace_dst) {
×
2892
        return SIXEL_OK;
2893
    }
2894

2895
    if (colorspace_src == SIXEL_COLORSPACE_SMPTEC &&
×
2896
            colorspace_dst == SIXEL_COLORSPACE_LINEAR) {
×
2897
        sixel_smptec_to_linear_float_simd(pixels, pixel_total, simd_level);
×
2898
        return SIXEL_OK;
×
2899
    }
2900

2901
    if (colorspace_src == SIXEL_COLORSPACE_LINEAR &&
×
2902
            colorspace_dst == SIXEL_COLORSPACE_SMPTEC) {
×
2903
        sixel_linear_to_smptec_float_simd(pixels, pixel_total, simd_level);
×
2904
        return SIXEL_OK;
×
2905
    }
2906

2907
    for (index = 0U; index < pixel_total; ++index) {
×
2908
        base = index * 3U;
×
2909
        pr = pixels + base + 0U;
×
2910
        pg = pixels + base + 1U;
×
2911
        pb = pixels + base + 2U;
×
2912

2913
        sixel_decode_linear_from_colorspace_float(colorspace_src,
×
2914
                                                  *pr,
2915
                                                  *pg,
2916
                                                  *pb,
2917
                                                  &r_lin,
2918
                                                  &g_lin,
2919
                                                  &b_lin);
2920

2921
        sixel_encode_linear_to_colorspace_float(colorspace_dst,
×
2922
                                                r_lin,
2923
                                                g_lin,
2924
                                                b_lin,
2925
                                                pr,
2926
                                                pg,
2927
                                                pb);
2928
    }
2929

2930
    return SIXEL_OK;
2931
}
2932

2933
#if SIXEL_ENABLE_THREADS
2934
typedef struct sixel_colorspace_parallel_context {
2935
    float *pixels;
2936
    size_t pixel_total;
2937
    size_t chunk_pixels;
2938
    int colorspace_src;
2939
    int colorspace_dst;
2940
    int simd_level;
2941
    sixel_logger_t *logger;
2942
} sixel_colorspace_parallel_context_t;
2943

2944
static int
2945
sixel_colorspace_log_clamp(size_t value)
2946
{
2947
    if (value > (size_t)INT_MAX) {
×
2948
        return INT_MAX;
2949
    }
2950

2951
    return (int)value;
2952
}
2953

2954
/*
2955
 * Allow deployments to defer thread fan-out on tiny buffers via
2956
 * SIXEL_COLORSPACE_PARALLEL_MIN_PIXELS. Defaults to 65537 pixels so the
2957
 * colorspace conversion waits for moderately sized frames before fanning
2958
 * out, but callers can override the behavior through the environment.
2959
 */
2960
static size_t
2961
sixel_colorspace_parallel_min_pixels(void)
2962
{
2963
    static int initialized = 0;
2964
    static size_t threshold = 65537;
2965
    char const *text;
2966
    char *endptr;
2967
    unsigned long long parsed;
2968

2969
    if (initialized) {
×
2970
        return threshold;
2971
    }
2972

2973
    initialized = 1;
2974
    text = getenv("SIXEL_COLORSPACE_PARALLEL_MIN_PIXELS");
2975
    if (text == NULL || text[0] == '\0') {
×
2976
        return threshold;
2977
    }
2978

2979
    errno = 0;
2980
    parsed = strtoull(text, &endptr, 10);
2981
    if (endptr == text || *endptr != '\0' || errno == ERANGE) {
×
2982
        return threshold;
2983
    }
2984

2985
    if (parsed > (unsigned long long)SIZE_MAX) {
×
2986
        threshold = SIZE_MAX;
2987
    } else {
2988
        threshold = (size_t)parsed;
2989
    }
2990

2991
    return threshold;
2992
}
2993

2994
/*
2995
 * Worker slices the pixel array into fixed chunks to keep writeback ranges
2996
 * disjoint. Each job reuses the same SIMD level decision to avoid repeated
2997
 * CPU feature detection.
2998
 */
2999
static int
3000
sixel_colorspace_parallel_worker(tp_job_t job,
3001
                                 void *userdata,
3002
                                 void *workspace)
3003
{
3004
    sixel_colorspace_parallel_context_t *ctx;
3005
    sixel_logger_t *logger;
3006
    size_t start;
3007
    size_t remaining;
3008
    size_t end;
3009
    int status;
3010
    int start_row;
3011
    int end_row;
3012

3013
    (void)workspace;
3014

3015
    ctx = (sixel_colorspace_parallel_context_t *)userdata;
3016
    logger = NULL;
3017
    start = 0U;
3018
    remaining = 0U;
3019
    end = 0U;
3020
    status = SIXEL_OK;
3021
    start_row = 0;
3022
    end_row = 0;
3023
    if (ctx == NULL) {
×
3024
        return SIXEL_BAD_ARGUMENT;
3025
    }
3026

3027
    if (job.band_index < 0) {
×
3028
        return SIXEL_BAD_ARGUMENT;
3029
    }
3030

3031
    start = (size_t)job.band_index * ctx->chunk_pixels;
3032
    if (start >= ctx->pixel_total) {
×
3033
        return SIXEL_OK;
3034
    }
3035

3036
    remaining = ctx->pixel_total - start;
3037
    if (remaining > ctx->chunk_pixels) {
×
3038
        remaining = ctx->chunk_pixels;
3039
    }
3040

3041
    end = start + remaining;
3042
    logger = ctx->logger;
3043
    if (logger != NULL && logger->active) {
×
3044
        start_row = sixel_colorspace_log_clamp(start);
3045
        end_row = sixel_colorspace_log_clamp(end);
3046
        sixel_logger_logf(logger,
3047
                          "worker",
3048
                          "colorspace",
3049
                          "start",
3050
                          job.band_index,
3051
                          start_row,
3052
                          start_row,
3053
                          end_row,
3054
                          start_row,
3055
                          end_row,
3056
                          "chunk=%zu", remaining);
3057
    }
3058

3059
    status = sixel_convert_pixels_via_linear_float_chunk(
3060
        ctx->pixels + start * 3U,
3061
        remaining,
3062
        ctx->colorspace_src,
3063
        ctx->colorspace_dst,
3064
        ctx->simd_level);
3065

3066
    if (logger != NULL && logger->active) {
×
3067
        sixel_logger_logf(logger,
3068
                          "worker",
3069
                          "colorspace",
3070
                          "finish",
3071
                          job.band_index,
3072
                          end_row,
3073
                          start_row,
3074
                          end_row,
3075
                          start_row,
3076
                          end_row,
3077
                          "status=%d", status);
3078
    }
3079

3080
    return status;
3081
}
3082
#endif
3083

3084
static SIXELSTATUS
3085
sixel_convert_pixels_via_linear_float(float *pixels,
×
3086
                                      size_t size,
3087
                                      int colorspace_src,
3088
                                      int colorspace_dst)
3089
{
3090
    size_t pixel_total;
×
3091
    int simd_level;
×
3092
    SIXELSTATUS status;
×
3093
#if SIXEL_ENABLE_THREADS
3094
    size_t job_count;
3095
    size_t chunk_pixels;
3096
    sixel_colorspace_parallel_context_t ctx;
3097
    threadpool_t *pool;
3098
    tp_job_t job;
3099
    int threads;
3100
    int queue_depth;
3101
    size_t job_index;
3102
    int rc;
3103
    sixel_logger_t logger;
3104
    sixel_logger_t *logger_ref;
3105
#endif
3106

3107
    if (colorspace_src == colorspace_dst) {
×
3108
        return SIXEL_OK;
3109
    }
3110

3111
    if (size % (3U * sizeof(float)) != 0U) {
×
3112
        return SIXEL_BAD_INPUT;
3113
    }
3114

3115
    pixel_total = size / (3U * sizeof(float));
×
3116
    simd_level = sixel_cpu_simd_level();
×
3117
    status = SIXEL_OK;
×
3118

3119
#if SIXEL_ENABLE_THREADS
3120
    logger_ref = NULL;
3121
    rc = SIXEL_RUNTIME_ERROR;
3122
    /*
3123
     * Enable the timeline logger when SIXEL_PARALLEL_LOG_PATH points to a
3124
     * writable sink. The controller emits a configure event even if the
3125
     * call later falls back to the serial path so the timeline remains
3126
     * continuous.
3127
     */
3128
    sixel_logger_init(&logger);
3129
    (void)sixel_logger_prepare_env(&logger);
3130
    if (logger.active) {
×
3131
        logger_ref = &logger;
3132
        sixel_logger_logf(logger_ref,
3133
                          "controller",
3134
                          "colorspace",
3135
                          "configure",
3136
                          -1,
3137
                          -1,
3138
                          0,
3139
                          sixel_colorspace_log_clamp(pixel_total),
3140
                          0,
3141
                          sixel_colorspace_log_clamp(pixel_total),
3142
                          "pixels=%zu simd=%d",
3143
                          pixel_total,
3144
                          simd_level);
3145
    }
3146

3147
    if (pixel_total >= sixel_colorspace_parallel_min_pixels()) {
×
3148
        threads = sixel_threads_resolve();
3149
        if (threads > 1) {
×
3150
            chunk_pixels = (pixel_total + (size_t)threads - 1U)
3151
                / (size_t)threads;
3152
            if (chunk_pixels == 0U) {
×
3153
                chunk_pixels = pixel_total;
3154
            }
3155

3156
            ctx.pixels = pixels;
3157
            ctx.pixel_total = pixel_total;
3158
            ctx.chunk_pixels = chunk_pixels;
3159
            ctx.colorspace_src = colorspace_src;
3160
            ctx.colorspace_dst = colorspace_dst;
3161
            ctx.simd_level = simd_level;
3162
            ctx.logger = logger_ref;
3163

3164
            queue_depth = threads * 3;
3165
            job_count = (pixel_total + chunk_pixels - 1U) / chunk_pixels;
3166
            if (queue_depth > (int)job_count) {
×
3167
                queue_depth = (int)job_count;
3168
            }
3169
            if (queue_depth < 1) {
×
3170
                queue_depth = 1;
3171
            }
3172

3173
            if (logger_ref != NULL) {
×
3174
                sixel_logger_logf(logger_ref,
3175
                                  "controller",
3176
                                  "colorspace",
3177
                                  "start",
3178
                                  -1,
3179
                                  -1,
3180
                                  0,
3181
                                  sixel_colorspace_log_clamp(pixel_total),
3182
                                  0,
3183
                                  sixel_colorspace_log_clamp(pixel_total),
3184
                                  "threads=%d chunk=%zu jobs=%zu",
3185
                                  threads,
3186
                                  chunk_pixels,
3187
                                  job_count);
3188
            }
3189

3190
            pool = threadpool_create(threads,
3191
                                     queue_depth,
3192
                                     0,
3193
                                     sixel_colorspace_parallel_worker,
3194
                                     &ctx);
3195
            if (pool != NULL) {
×
3196
                for (job_index = 0U; job_index < job_count; ++job_index) {
×
3197
                    job.band_index = (int)job_index;
3198
                    threadpool_push(pool, job);
3199
                }
3200

3201
                threadpool_finish(pool);
3202
                rc = threadpool_get_error(pool);
3203
                threadpool_destroy(pool);
3204

3205
                if (rc == SIXEL_OK) {
×
3206
                    if (logger_ref != NULL) {
×
3207
                        sixel_logger_logf(
3208
                            logger_ref,
3209
                            "controller",
3210
                            "colorspace",
3211
                            "finish",
3212
                            -1,
3213
                            -1,
3214
                            0,
3215
                            sixel_colorspace_log_clamp(pixel_total),
3216
                            0,
3217
                            sixel_colorspace_log_clamp(pixel_total),
3218
                            "parallel finish threads=%d", threads);
3219
                    }
3220
                    status = SIXEL_OK;
3221
                    goto end;
3222
                }
3223
            }
3224

3225
            if (logger_ref != NULL) {
×
3226
                sixel_logger_logf(logger_ref,
3227
                                  "controller",
3228
                                  "colorspace",
3229
                                  "fallback",
3230
                                  -1,
3231
                                  -1,
3232
                                  0,
3233
                                  sixel_colorspace_log_clamp(pixel_total),
3234
                                  0,
3235
                                  sixel_colorspace_log_clamp(pixel_total),
3236
                                  "threadpool fallback rc=%d", rc);
3237
            }
3238
        } else if (logger_ref != NULL) {
×
3239
            sixel_logger_logf(logger_ref,
3240
                              "controller",
3241
                              "colorspace",
3242
                              "fallback",
3243
                              -1,
3244
                              -1,
3245
                              0,
3246
                              sixel_colorspace_log_clamp(pixel_total),
3247
                              0,
3248
                              sixel_colorspace_log_clamp(pixel_total),
3249
                              "threads=%d", threads);
3250
        }
3251
    } else if (logger_ref != NULL) {
×
3252
        sixel_logger_logf(logger_ref,
3253
                          "controller",
3254
                          "colorspace",
3255
                          "fallback",
3256
                          -1,
3257
                          -1,
3258
                          0,
3259
                          sixel_colorspace_log_clamp(pixel_total),
3260
                          0,
3261
                          sixel_colorspace_log_clamp(pixel_total),
3262
                          "below threshold=%zu",
3263
                          sixel_colorspace_parallel_min_pixels());
3264
    }
3265
#endif
3266

3267
#if SIXEL_ENABLE_THREADS
3268
    if (logger_ref != NULL) {
×
3269
        sixel_logger_logf(logger_ref,
3270
                          "worker",
3271
                          "colorspace",
3272
                          "start",
3273
                          0,
3274
                          0,
3275
                          0,
3276
                          sixel_colorspace_log_clamp(pixel_total),
3277
                          0,
3278
                          sixel_colorspace_log_clamp(pixel_total),
3279
                          "serial chunk size=%zu", pixel_total);
3280
    }
3281
#endif
3282

3283
    status = sixel_convert_pixels_via_linear_float_chunk(pixels,
×
3284
                                                         pixel_total,
3285
                                                         colorspace_src,
3286
                                                         colorspace_dst,
3287
                                                         simd_level);
3288

3289
#if SIXEL_ENABLE_THREADS
3290
    if (logger_ref != NULL) {
×
3291
        sixel_logger_logf(logger_ref,
3292
                          "worker",
3293
                          "colorspace",
3294
                          "finish",
3295
                          0,
3296
                          sixel_colorspace_log_clamp(pixel_total),
3297
                          0,
3298
                          sixel_colorspace_log_clamp(pixel_total),
3299
                          0,
3300
                          sixel_colorspace_log_clamp(pixel_total),
3301
                          "serial status=%d", status);
3302
    }
3303

3304
end:
3305
    sixel_logger_close(&logger);
3306
#endif
3307

3308
    return status;
×
3309
}
3310

3311
static unsigned char
3312
sixel_colorspace_convert_component(unsigned char value,
26,775✔
3313
                                   int colorspace_src,
3314
                                   int colorspace_dst)
3315
{
3316
    if (colorspace_src == colorspace_dst) {
26,775!
3317
        return value;
3318
    }
3319

3320
    if (colorspace_src == SIXEL_COLORSPACE_GAMMA &&
26,775!
3321
            colorspace_dst == SIXEL_COLORSPACE_LINEAR) {
26,775!
3322
        return gamma_to_linear_lut[value];
×
3323
    }
3324

3325
    if (colorspace_src == SIXEL_COLORSPACE_LINEAR &&
26,775!
3326
            colorspace_dst == SIXEL_COLORSPACE_GAMMA) {
26,775!
3327
        return linear_to_gamma_lut[value];
×
3328
    }
3329

3330
    return value;
3331
}
3332

3333
int
3334
sixel_colorspace_supports_pixelformat(int pixelformat)
99✔
3335
{
3336
    switch (pixelformat) {
99!
3337
    case SIXEL_PIXELFORMAT_RGB888:
3338
    case SIXEL_PIXELFORMAT_BGR888:
3339
    case SIXEL_PIXELFORMAT_RGBA8888:
3340
    case SIXEL_PIXELFORMAT_ARGB8888:
3341
    case SIXEL_PIXELFORMAT_BGRA8888:
3342
    case SIXEL_PIXELFORMAT_ABGR8888:
3343
    case SIXEL_PIXELFORMAT_G8:
3344
    case SIXEL_PIXELFORMAT_GA88:
3345
    case SIXEL_PIXELFORMAT_AG88:
3346
    case SIXEL_PIXELFORMAT_RGBFLOAT32:
3347
    case SIXEL_PIXELFORMAT_LINEARRGBFLOAT32:
3348
    case SIXEL_PIXELFORMAT_OKLABFLOAT32:
3349
    case SIXEL_PIXELFORMAT_CIELABFLOAT32:
3350
    case SIXEL_PIXELFORMAT_DIN99DFLOAT32:
3351
        return 1;
3352
    default:
3353
        break;
×
3354
    }
3355

3356
    return 0;
×
3357
}
3358

3359
SIXELAPI SIXELSTATUS
3360
sixel_helper_convert_colorspace(unsigned char *pixels,
99✔
3361
                                size_t size,
3362
                                int pixelformat,
3363
                                int colorspace_src,
3364
                                int colorspace_dst)
3365
{
3366
    size_t i;
99✔
3367
    int simd_level;
99✔
3368

3369
    if (pixels == NULL) {
99!
3370
        sixel_helper_set_additional_message(
×
3371
            "sixel_helper_convert_colorspace: pixels is null.");
3372
        return SIXEL_BAD_ARGUMENT;
×
3373
    }
3374

3375
    if (colorspace_src == colorspace_dst) {
99!
3376
        return SIXEL_OK;
3377
    }
3378

3379
    if (!sixel_colorspace_supports_pixelformat(pixelformat)) {
99!
3380
        sixel_helper_set_additional_message(
×
3381
            "sixel_helper_convert_colorspace: unsupported pixelformat.");
3382
        return SIXEL_BAD_INPUT;
×
3383
    }
3384

3385
    sixel_colorspace_init_tables();
99✔
3386

3387
#if (defined(SIXEL_USE_AVX512) && defined(__AVX512F__) && \
3388
        defined(__AVX512BW__)) || \
3389
        (defined(SIXEL_USE_AVX2) && defined(__AVX2__)) || \
3390
        defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
3391
    simd_level = sixel_cpu_simd_level();
66✔
3392
#else
3393
    simd_level = SIXEL_SIMD_LEVEL_SCALAR;
33✔
3394
#endif
3395

3396
#if (!defined(SIXEL_USE_AVX512) || !defined(__AVX512F__) || \
3397
        !defined(__AVX512BW__)) && \
3398
        (!defined(SIXEL_USE_AVX2) || !defined(__AVX2__)) && \
3399
        !defined(SIXEL_USE_SSE2) && !defined(SIXEL_USE_NEON)
3400
    (void)simd_level;
33✔
3401
#endif
3402

3403
    if (SIXEL_PIXELFORMAT_IS_FLOAT32(pixelformat)) {
99!
3404
        return sixel_convert_pixels_via_linear_float((float *)pixels,
×
3405
                                                     size,
3406
                                                     colorspace_src,
3407
                                                     colorspace_dst);
3408
    }
3409

3410
#if defined(SIXEL_USE_AVX512) && defined(__AVX512F__) && \
3411
        defined(__AVX512BW__)
3412
    if (simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
3413
        SIXELSTATUS avx512_status;
3414

3415
        avx512_status = sixel_colorspace_convert_avx512(pixels,
3416
                                                        size,
3417
                                                        pixelformat,
3418
                                                        colorspace_src,
3419
                                                        colorspace_dst);
3420
        if (avx512_status == SIXEL_OK) {
3421
            return SIXEL_OK;
3422
        }
3423
    }
3424
#endif
3425

3426
#if defined(SIXEL_USE_AVX2) && defined(__AVX2__)
3427
    if (simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
3428
        SIXELSTATUS avx_status;
3429

3430
        avx_status = sixel_colorspace_convert_avx2(pixels,
3431
                                                   size,
3432
                                                   pixelformat,
3433
                                                   colorspace_src,
3434
                                                   colorspace_dst);
3435
        if (avx_status == SIXEL_OK) {
3436
            return SIXEL_OK;
3437
        }
3438
    }
3439
#endif
3440

3441
#if defined(SIXEL_USE_SSE2)
3442
    if (simd_level >= SIXEL_SIMD_LEVEL_SSE2) {
66!
3443
        SIXELSTATUS sse_status;
66✔
3444

3445
        sse_status = sixel_colorspace_convert_sse2(pixels,
66✔
3446
                                                   size,
3447
                                                   pixelformat,
3448
                                                   colorspace_src,
3449
                                                   colorspace_dst);
3450
        if (sse_status == SIXEL_OK) {
66!
3451
            return SIXEL_OK;
3452
        }
3453
    }
3454
#endif
3455

3456
#if defined(SIXEL_USE_NEON)
3457
    if (simd_level == SIXEL_SIMD_LEVEL_NEON &&
×
3458
            sixel_colorspace_neon_supported_format(pixelformat)) {
3459
        SIXELSTATUS neon_status;
3460

3461
        neon_status = sixel_colorspace_convert_neon(pixels,
3462
                                                    size,
3463
                                                    pixelformat,
3464
                                                    colorspace_src,
3465
                                                    colorspace_dst);
3466
        if (neon_status == SIXEL_OK) {
×
3467
            return SIXEL_OK;
3468
        }
3469
    }
×
3470
#endif
3471

3472
    if (colorspace_src == SIXEL_COLORSPACE_OKLAB ||
99!
3473
            colorspace_dst == SIXEL_COLORSPACE_OKLAB ||
99!
3474
            colorspace_src == SIXEL_COLORSPACE_CIELAB ||
99!
3475
            colorspace_dst == SIXEL_COLORSPACE_CIELAB ||
99!
3476
            colorspace_src == SIXEL_COLORSPACE_DIN99D ||
99!
3477
            colorspace_dst == SIXEL_COLORSPACE_DIN99D ||
99!
3478
            colorspace_src == SIXEL_COLORSPACE_SMPTEC ||
99!
3479
            colorspace_dst == SIXEL_COLORSPACE_SMPTEC) {
99!
3480
        SIXELSTATUS status = sixel_convert_pixels_via_linear(pixels,
×
3481
                                                             size,
3482
                                                             pixelformat,
3483
                                                             colorspace_src,
3484
                                                             colorspace_dst);
3485
        if (SIXEL_FAILED(status)) {
×
3486
            sixel_helper_set_additional_message(
×
3487
                "sixel_helper_convert_colorspace: unsupported "
3488
                "pixelformat for conversion.");
3489
        }
3490
        return status;
×
3491
    }
3492

3493
    switch (pixelformat) {
99!
3494
    case SIXEL_PIXELFORMAT_RGB888:
99✔
3495
        if (size % 3 != 0) {
99!
3496
            sixel_helper_set_additional_message(
×
3497
                "sixel_helper_convert_colorspace: invalid data size.");
3498
            return SIXEL_BAD_INPUT;
×
3499
        }
3500
        for (i = 0; i + 2 < size; i += 3) {
9,024!
3501
            pixels[i + 0] = sixel_colorspace_convert_component(
8,925✔
3502
                pixels[i + 0], colorspace_src, colorspace_dst);
8,925✔
3503
            pixels[i + 1] = sixel_colorspace_convert_component(
8,925✔
3504
                pixels[i + 1], colorspace_src, colorspace_dst);
8,925✔
3505
            pixels[i + 2] = sixel_colorspace_convert_component(
8,925✔
3506
                pixels[i + 2], colorspace_src, colorspace_dst);
8,925✔
3507
        }
3508
        break;
3509
    case SIXEL_PIXELFORMAT_BGR888:
×
3510
        if (size % 3 != 0) {
×
3511
            sixel_helper_set_additional_message(
×
3512
                "sixel_helper_convert_colorspace: invalid data size.");
3513
            return SIXEL_BAD_INPUT;
×
3514
        }
3515
        for (i = 0; i + 2 < size; i += 3) {
×
3516
            pixels[i + 0] = sixel_colorspace_convert_component(
×
3517
                pixels[i + 0], colorspace_src, colorspace_dst);
×
3518
            pixels[i + 1] = sixel_colorspace_convert_component(
×
3519
                pixels[i + 1], colorspace_src, colorspace_dst);
×
3520
            pixels[i + 2] = sixel_colorspace_convert_component(
×
3521
                pixels[i + 2], colorspace_src, colorspace_dst);
×
3522
        }
3523
        break;
3524
    case SIXEL_PIXELFORMAT_RGBA8888:
×
3525
        if (size % 4 != 0) {
×
3526
            sixel_helper_set_additional_message(
×
3527
                "sixel_helper_convert_colorspace: invalid data size.");
3528
            return SIXEL_BAD_INPUT;
×
3529
        }
3530
        for (i = 0; i + 3 < size; i += 4) {
×
3531
            pixels[i + 0] = sixel_colorspace_convert_component(
×
3532
                pixels[i + 0], colorspace_src, colorspace_dst);
×
3533
            pixels[i + 1] = sixel_colorspace_convert_component(
×
3534
                pixels[i + 1], colorspace_src, colorspace_dst);
×
3535
            pixels[i + 2] = sixel_colorspace_convert_component(
×
3536
                pixels[i + 2], colorspace_src, colorspace_dst);
×
3537
        }
3538
        break;
3539
    case SIXEL_PIXELFORMAT_ARGB8888:
×
3540
        if (size % 4 != 0) {
×
3541
            sixel_helper_set_additional_message(
×
3542
                "sixel_helper_convert_colorspace: invalid data size.");
3543
            return SIXEL_BAD_INPUT;
×
3544
        }
3545
        for (i = 0; i + 3 < size; i += 4) {
×
3546
            pixels[i + 1] = sixel_colorspace_convert_component(
×
3547
                pixels[i + 1], colorspace_src, colorspace_dst);
×
3548
            pixels[i + 2] = sixel_colorspace_convert_component(
×
3549
                pixels[i + 2], colorspace_src, colorspace_dst);
×
3550
            pixels[i + 3] = sixel_colorspace_convert_component(
×
3551
                pixels[i + 3], colorspace_src, colorspace_dst);
×
3552
        }
3553
        break;
3554
    case SIXEL_PIXELFORMAT_BGRA8888:
×
3555
        if (size % 4 != 0) {
×
3556
            sixel_helper_set_additional_message(
×
3557
                "sixel_helper_convert_colorspace: invalid data size.");
3558
            return SIXEL_BAD_INPUT;
×
3559
        }
3560
        for (i = 0; i + 3 < size; i += 4) {
×
3561
            pixels[i + 0] = sixel_colorspace_convert_component(
×
3562
                pixels[i + 0], colorspace_src, colorspace_dst);
×
3563
            pixels[i + 1] = sixel_colorspace_convert_component(
×
3564
                pixels[i + 1], colorspace_src, colorspace_dst);
×
3565
            pixels[i + 2] = sixel_colorspace_convert_component(
×
3566
                pixels[i + 2], colorspace_src, colorspace_dst);
×
3567
        }
3568
        break;
3569
    case SIXEL_PIXELFORMAT_ABGR8888:
×
3570
        if (size % 4 != 0) {
×
3571
            sixel_helper_set_additional_message(
×
3572
                "sixel_helper_convert_colorspace: invalid data size.");
3573
            return SIXEL_BAD_INPUT;
×
3574
        }
3575
        for (i = 0; i + 3 < size; i += 4) {
×
3576
            pixels[i + 1] = sixel_colorspace_convert_component(
×
3577
                pixels[i + 1], colorspace_src, colorspace_dst);
×
3578
            pixels[i + 2] = sixel_colorspace_convert_component(
×
3579
                pixels[i + 2], colorspace_src, colorspace_dst);
×
3580
            pixels[i + 3] = sixel_colorspace_convert_component(
×
3581
                pixels[i + 3], colorspace_src, colorspace_dst);
×
3582
        }
3583
        break;
3584
    case SIXEL_PIXELFORMAT_G8:
3585
        for (i = 0; i < size; ++i) {
×
3586
            pixels[i] = sixel_colorspace_convert_component(
×
3587
                pixels[i], colorspace_src, colorspace_dst);
×
3588
        }
3589
        break;
3590
    case SIXEL_PIXELFORMAT_GA88:
×
3591
        if (size % 2 != 0) {
×
3592
            sixel_helper_set_additional_message(
×
3593
                "sixel_helper_convert_colorspace: invalid data size.");
3594
            return SIXEL_BAD_INPUT;
×
3595
        }
3596
        for (i = 0; i + 1 < size; i += 2) {
×
3597
            pixels[i + 0] = sixel_colorspace_convert_component(
×
3598
                pixels[i + 0], colorspace_src, colorspace_dst);
×
3599
        }
3600
        break;
3601
    case SIXEL_PIXELFORMAT_AG88:
×
3602
        if (size % 2 != 0) {
×
3603
            sixel_helper_set_additional_message(
×
3604
                "sixel_helper_convert_colorspace: invalid data size.");
3605
            return SIXEL_BAD_INPUT;
×
3606
        }
3607
        for (i = 0; i + 1 < size; i += 2) {
×
3608
            pixels[i + 1] = sixel_colorspace_convert_component(
×
3609
                pixels[i + 1], colorspace_src, colorspace_dst);
×
3610
        }
3611
        break;
3612
    default:
×
3613
        sixel_helper_set_additional_message(
×
3614
            "sixel_helper_convert_colorspace: unsupported pixelformat.");
3615
        return SIXEL_BAD_INPUT;
×
3616
    }
3617

3618
    return SIXEL_OK;
3619
}
3620

3621
/* emacs Local Variables:      */
3622
/* emacs mode: c               */
3623
/* emacs tab-width: 4          */
3624
/* emacs indent-tabs-mode: nil */
3625
/* emacs c-basic-offset: 4     */
3626
/* emacs End:                  */
3627
/* vim: set expandtab ts=4 sts=4 sw=4 : */
3628
/* EOF */
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc