• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

saitoha / libsixel / 20609368106

31 Dec 2025 12:57AM UTC coverage: 52.011% (-6.3%) from 58.281%
20609368106

push

github

saitoha
tests: split converter option tap suites

14741 of 45141 branches covered (32.66%)

21394 of 41134 relevant lines covered (52.01%)

3932390.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.93
/src/scale.c
1
/*
2
 * SPDX-License-Identifier: MIT
3
 *
4
 * Copyright (c) 2021-2025 libsixel developers. See `AUTHORS`.
5
 * Copyright (c) 2014-2016 Hayaki Saito
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
8
 * this software and associated documentation files (the "Software"), to deal in
9
 * the Software without restriction, including without limitation the rights to
10
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
11
 * the Software, and to permit persons to whom the Software is furnished to do so,
12
 * subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in all
15
 * copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
19
 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
20
 * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
21
 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
 */
24

25
#if defined(HAVE_CONFIG_H)
26
#include "config.h"
27
#endif
28

29
/* STDC_HEADERS */
30
#include <stdlib.h>
31

32
#if HAVE_ERRNO_H
33
# include <errno.h>
34
#endif  /* HAVE_ERRNO_H */
35
#if HAVE_LIMITS_H
36
# include <limits.h>
37
#endif  /* HAVE_LIMITS_H */
38
#if HAVE_STRING_H
39
# include <string.h>
40
#endif  /* HAVE_STRING_H */
41
#if HAVE_STDINT_H
42
# include <stdint.h>
43
#endif  /* HAVE_STDINT_H */
44

45
#if HAVE_MATH_H
46
# define _USE_MATH_DEFINES  /* for MSVC */
47
# include <math.h>
48
#endif  /* HAVE_MATH_H */
49
#ifndef M_PI
50
# define M_PI 3.14159265358979323846
51
#endif
52

53
#include <sixel.h>
54

55
#include "cpu.h"
56
#include "logger.h"
57
#include "compat_stub.h"
58
#include "threading.h"
59

60
#if SIXEL_ENABLE_THREADS
61
# include "threadpool.h"
62
#endif
63

64
#if defined(__GNUC__) && defined(__i386__)
65
/*
66
 * i386 callers may enter with only 4- or 8-byte stack alignment. Force
67
 * realignment for SSE2-heavy routines to avoid movaps spills to unaligned
68
 * stack slots when SIMD is enabled via SIXEL_SIMD_LEVEL. Mark affected
69
 * functions noinline so the prologue that performs realignment is not
70
 * dropped by inlining.
71
 */
72
# define SIXEL_ALIGN_STACK __attribute__((force_align_arg_pointer))
73
# define SIXEL_NO_INLINE __attribute__((noinline))
74
#else
75
# define SIXEL_ALIGN_STACK
76
# define SIXEL_NO_INLINE
77
#endif
78

79
#if defined(HAVE_IMMINTRIN_H) && \
80
    (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
81
     defined(_M_IX86))
82
# define SIXEL_HAS_X86_INTRIN 1
83
# include <immintrin.h>
84
#endif
85

86
#if defined(__GNUC__) && !defined(__clang__)
87
/*
88
 * GCC reports a -Wpsabi note when __m512 parameters are present because the
89
 * calling convention changed in GCC 4.6. All callers and callees in this
90
 * translation unit share the same compiler, so suppress the note globally to
91
 * keep the output clean on AVX-512 builds.
92
 */
93
#pragma GCC diagnostic ignored "-Wpsabi"
94
#endif
95

96
#if defined(HAVE_SSE2)
97
/*
98
 * MSVC does not define __SSE2__ on x86/x64.  Instead, rely on the
99
 * architecture macros it provides so SIMD paths stay enabled after the
100
 * configure probe has validated SSE2 support.
101
 */
102
# if defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
103
    (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
104
#  if defined(HAVE_EMMINTRIN_H)
105
#   include <emmintrin.h>
106
#   define SIXEL_USE_SSE2 1
107
#  endif
108
# endif
109
#endif
110

111
#if defined(SIXEL_HAS_X86_INTRIN)
112
/* Reset ISA target macros in case another compilation unit defined them */
113
/* earlier in a unity or amalgamation build. */
114
# if defined(SIXEL_TARGET_AVX)
115
#  undef SIXEL_TARGET_AVX
116
# endif
117
# if defined(SIXEL_TARGET_AVX2)
118
#  undef SIXEL_TARGET_AVX2
119
# endif
120
# if defined(SIXEL_TARGET_AVX512)
121
#  undef SIXEL_TARGET_AVX512
122
# endif
123
# if defined(__GNUC__)
124
#  if !defined(__clang__)
125
#   define SIXEL_TARGET_AVX __attribute__((target("avx")))
126
#   define SIXEL_TARGET_AVX2 __attribute__((target("avx2")))
127
#   define SIXEL_TARGET_AVX512 __attribute__((target("avx512f")))
128
#   define SIXEL_USE_AVX 1
129
#  else
130
/*
131
 * clang rejects returning AVX vectors when the translation unit target
132
 * does not already include the corresponding ISA.  Guard runtime AVX
133
 * helpers with compile-time ISA availability to keep non-AVX builds
134
 * warning-free while still using AVX when the compiler enables it.
135
 */
136
#   define SIXEL_TARGET_AVX
137
#   define SIXEL_TARGET_AVX2
138
#   define SIXEL_TARGET_AVX512
139
#   if defined(__AVX__)
140
#    define SIXEL_USE_AVX 1
141
#   endif
142
#   if defined(__AVX2__)
143
#    define SIXEL_USE_AVX2 1
144
#   endif
145
#   if defined(__AVX512F__)
146
#    define SIXEL_USE_AVX512 1
147
#   endif
148
#  endif
149
# else
150
#  define SIXEL_TARGET_AVX
151
#  define SIXEL_TARGET_AVX2
152
#  define SIXEL_TARGET_AVX512
153
#  if defined(__AVX__)
154
#   define SIXEL_USE_AVX 1
155
#  endif
156
#  if defined(__AVX2__)
157
#   define SIXEL_USE_AVX2 1
158
#  endif
159
#  if defined(__AVX512F__)
160
#   define SIXEL_USE_AVX512 1
161
#  endif
162
# endif
163
#endif
164

165
#if defined(__GNUC__) && !defined(__clang__) && !defined(__PCC__)
166
# pragma GCC diagnostic push
167
# pragma GCC diagnostic ignored "-Wpsabi"
168
# undef SIXEL_USE_AVX
169
# undef SIXEL_USE_AVX2
170
# undef SIXEL_USE_AVX512
171
#endif
172

173
#if defined(HAVE_NEON)
174
# if (defined(__ARM_NEON) || defined(__ARM_NEON__))
175
#  if defined(HAVE_ARM_NEON_H)
176
#   include <arm_neon.h>
177
#   define SIXEL_USE_NEON 1
178
#  endif
179
# endif
180
#endif
181

182
#if !defined(MAX)
183
# define MAX(l, r) ((l) > (r) ? (l) : (r))
184
#endif
185
#if !defined(MIN)
186
#define MIN(l, r) ((l) < (r) ? (l) : (r))
187
#endif
188

189

190
#if 0
191
/* function Nearest Neighbor */
192
static double
193
nearest_neighbor(double const d)
194
{
195
    if (d <= 0.5) {
196
        return 1.0;
197
    }
198
    return 0.0;
199
}
200
#endif
201

202

203
/* function Bi-linear */
204
static double
205
bilinear(double const d)
136,832,164✔
206
{
207
    if (d < 1.0) {
136,832,164✔
208
        return 1.0 - d;
106,973,416✔
209
    }
210
    return 0.0;
211
}
212

213

214
/* function Welsh */
215
static double
216
welsh(double const d)
3,349,600✔
217
{
218
    if (d < 1.0) {
3,349,600✔
219
        return 1.0 - d * d;
598,400✔
220
    }
221
    return 0.0;
222
}
223

224

225
/* function Bi-cubic */
226
static double
227
bicubic(double const d)
5,192,830✔
228
{
229
    if (d <= 1.0) {
5,192,830✔
230
        return 1.0 + (d - 2.0) * d * d;
2,506,830✔
231
    }
232
    if (d <= 2.0) {
2,686,000✔
233
        return 4.0 + d * (-8.0 + d * (5.0 - d));
2,480,400✔
234
    }
235
    return 0.0;
236
}
237

238

239
/* function sinc
240
 * sinc(x) = sin(PI * x) / (PI * x)
241
 */
242
static double
243
sinc(double const x)
82,905,600✔
244
{
245
    return sin(M_PI * x) / (M_PI * x);
82,905,600✔
246
}
247

248

249
/* function Lanczos-2
250
 * Lanczos(x) = sinc(x) * sinc(x / 2) , |x| <= 2
251
 *            = 0, |x| > 2
252
 */
253
static double
254
lanczos2(double const d)
12,435,064✔
255
{
256
    if (d == 0.0) {
12,435,064!
257
        return 1.0;
258
    }
259
    if (d < 2.0) {
12,435,064✔
260
        return sinc(d) * sinc(d / 2.0);
2,879,808✔
261
    }
262
    return 0.0;
263
}
264

265

266
/* function Lanczos-3
267
 * Lanczos(x) = sinc(x) * sinc(x / 3) , |x| <= 3
268
 *            = 0, |x| > 3
269
 */
270
static double
271
lanczos3(double const d)
109,090,960✔
272
{
273
    if (d == 0.0) {
109,090,960!
274
        return 1.0;
275
    }
276
    if (d < 3.0) {
109,090,960✔
277
        return sinc(d) * sinc(d / 3.0);
77,355,688✔
278
    }
279
    return 0.0;
280
}
281

282
/* function Lanczos-4
283
 * Lanczos(x) = sinc(x) * sinc(x / 4) , |x| <= 4
284
 *            = 0, |x| > 4
285
 */
286
static double
287
lanczos4(double const d)
11,631,432✔
288
{
289
    if (d == 0.0) {
11,631,432!
290
        return 1.0;
291
    }
292
    if (d < 4.0) {
11,631,432✔
293
        return sinc(d) * sinc(d / 4.0);
2,670,104✔
294
    }
295
    return 0.0;
296
}
297

298

299
static double
300
gaussian(double const d)
2,714,600✔
301
{
302
    return exp(-2.0 * d * d) * sqrt(2.0 / M_PI);
2,714,600✔
303
}
304

305

306
static double
307
hanning(double const d)
2,923,184✔
308
{
309
    return 0.5 + 0.5 * cos(d * M_PI);
2,923,184✔
310
}
311

312

313
static double
314
hamming(const double d)
3,349,600✔
315
{
316
    return 0.54 + 0.46 * cos(d * M_PI);
3,349,600✔
317
}
318

319

320
static unsigned char
321
normalize(double x, double total)
12,031,785✔
322
{
323
    int result;
12,031,785✔
324

325
    result = floor(x / total);
12,031,785✔
326
    if (result > 255) {
12,031,785!
327
        return 0xff;
328
    }
329
    if (result < 0) {
12,030,115!
330
        return 0x00;
331
    }
332
    return (unsigned char)result;
12,029,144✔
333
}
334

335
static int
336
sixel_scale_simd_level(void)
129✔
337
{
338
    static int simd_level = -2;
129✔
339

340
    if (simd_level == -2) {
129!
341
        simd_level = sixel_cpu_simd_level();
129✔
342
#if defined(__i386__)
343
        /*
344
         * AVX and later widen the alignment requirement for stack spills to
345
         * 32 bytes. i386 stack realignment from force_align_arg_pointer only
346
         * guarantees 16-byte boundaries, so keep the runtime level capped at
347
         * SSE2 to avoid vmovaps faults when YMM locals spill.
348
         */
349
        if (simd_level > SIXEL_SIMD_LEVEL_SSE2) {
350
            simd_level = SIXEL_SIMD_LEVEL_SSE2;
351
        }
352
#endif
353
    }
354

355
    return simd_level;
129✔
356
}
357

358
static float
359
sixel_clamp_unit_f32(float value)
712,608✔
360
{
361
    /*
362
     * Resampling kernels with negative lobes can push linear RGB values
363
     * outside the unit interval. Clamp here so downstream conversions do
364
     * not collapse to black.
365
     */
366
    if (value < 0.0f) {
712,608!
367
        return 0.0f;
368
    }
369
    if (value > 1.0f) {
712,541!
370
        return 1.0f;
371
    }
372

373
    return value;
374
}
375

376
#if defined(HAVE_IMMINTRIN_H)
377
#if defined(SIXEL_USE_AVX)
378
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
379
sixel_avx_load_rgb_ps(unsigned char const *psrc)
380
{
381
    __m128i pixi128;
382
    __m128 pixf128;
383
    __m256 pixf256;
384

385
    /*
386
     * Build the byte vector explicitly so the AVX path never accumulates
387
     * garbage data when widening to 32-bit lanes.
388
     */
389
    pixi128 = _mm_setr_epi8((char)psrc[0],
390
                            (char)psrc[1],
391
                            (char)psrc[2],
392
                            0,
393
                            0, 0, 0, 0,
394
                            0, 0, 0, 0,
395
                            0, 0, 0, 0);
396
    pixf128 = _mm_cvtepi32_ps(pixi128);
397
    pixf256 = _mm256_castps128_ps256(pixf128);
398
    pixf256 = _mm256_insertf128_ps(pixf256, _mm_setzero_ps(), 1);
399
    return pixf256;
400
}
401

402
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX void
403
sixel_avx_store_rgb_u8(__m256 acc, double total, unsigned char *dst)
404
{
405
    __m256 scalev;
406
    __m256 minv;
407
    __m256 maxv;
408
    __m256i acci;
409
    int out[8];
410

411
    scalev = _mm256_set1_ps((float)(1.0 / total));
412
    acc = _mm256_mul_ps(acc, scalev);
413
    minv = _mm256_set1_ps(0.0f);
414
    maxv = _mm256_set1_ps(255.0f);
415
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
416
    acci = _mm256_cvtps_epi32(acc);
417
    _mm256_storeu_si256((__m256i *)out, acci);
418
    dst[0] = (unsigned char)out[0];
419
    dst[1] = (unsigned char)out[1];
420
    dst[2] = (unsigned char)out[2];
421
}
422

423
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
424
sixel_avx_zero_ps(void)
425
{
426
    return _mm256_setzero_ps();
427
}
428

429
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
430
sixel_avx_muladd_ps(__m256 acc, __m256 pix, float weight)
431
{
432
    __m256 wv;
433

434
    wv = _mm256_set1_ps(weight);
435
    return _mm256_add_ps(acc, _mm256_mul_ps(pix, wv));
436
}
437

438
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
439
sixel_avx_load_rgb_f32(float const *psrc)
440
{
441
    __m256 pixf;
442

443
    pixf = _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f,
444
                         psrc[2], psrc[1], psrc[0], 0.0f);
445
    return pixf;
446
}
447

448
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX void
449
sixel_avx_store_rgb_f32(__m256 acc, double total, float *dst)
450
{
451
    __m256 scalev;
452
    __m256 minv;
453
    __m256 maxv;
454
    float out[8];
455

456
    scalev = _mm256_set1_ps((float)(1.0 / total));
457
    acc = _mm256_mul_ps(acc, scalev);
458
    minv = _mm256_set1_ps(0.0f);
459
    maxv = _mm256_set1_ps(1.0f);
460
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
461
    _mm256_storeu_ps(out, acc);
462
    dst[0] = out[0];
463
    dst[1] = out[1];
464
    dst[2] = out[2];
465
}
466
#endif  /* SIXEL_USE_AVX */
467

468
#if defined(SIXEL_USE_AVX2)
469
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
470
sixel_avx2_load_rgb_ps(unsigned char const *psrc)
471
{
472
    __m128i pixi128;
473
    __m256i pixi256;
474

475
    /*
476
     * Keep the unused bytes zeroed so widening to epi32 does not pull in
477
     * stack junk and bias every output channel toward white.
478
     */
479
    pixi128 = _mm_setr_epi8((char)psrc[0],
480
                            (char)psrc[1],
481
                            (char)psrc[2],
482
                            0,
483
                            0, 0, 0, 0,
484
                            0, 0, 0, 0,
485
                            0, 0, 0, 0);
486
    pixi256 = _mm256_cvtepu8_epi32(pixi128);
487
    return _mm256_cvtepi32_ps(pixi256);
488
}
489

490
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 void
491
sixel_avx2_store_rgb_u8(__m256 acc, double total, unsigned char *dst)
492
{
493
    __m256 scalev;
494
    __m256 minv;
495
    __m256 maxv;
496
    __m256i acci;
497
    int out[8];
498

499
    scalev = _mm256_set1_ps((float)(1.0 / total));
500
    acc = _mm256_mul_ps(acc, scalev);
501
    minv = _mm256_set1_ps(0.0f);
502
    maxv = _mm256_set1_ps(255.0f);
503
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
504
    acci = _mm256_cvtps_epi32(acc);
505
    _mm256_storeu_si256((__m256i *)out, acci);
506
    dst[0] = (unsigned char)out[0];
507
    dst[1] = (unsigned char)out[1];
508
    dst[2] = (unsigned char)out[2];
509
}
510

511
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
512
sixel_avx2_zero_ps(void)
513
{
514
    return _mm256_setzero_ps();
515
}
516

517
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
518
sixel_avx2_muladd_ps(__m256 acc, __m256 pix, float weight)
519
{
520
    __m256 wv;
521

522
    wv = _mm256_set1_ps(weight);
523
    return _mm256_add_ps(acc, _mm256_mul_ps(pix, wv));
524
}
525

526
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
527
sixel_avx2_load_rgb_f32(float const *psrc)
528
{
529
    __m256 pixf;
530

531
    pixf = _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f,
532
                         psrc[2], psrc[1], psrc[0], 0.0f);
533
    return pixf;
534
}
535

536
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 void
537
sixel_avx2_store_rgb_f32(__m256 acc, double total, float *dst)
538
{
539
    __m256 scalev;
540
    __m256 minv;
541
    __m256 maxv;
542
    float out[8];
543

544
    scalev = _mm256_set1_ps((float)(1.0 / total));
545
    acc = _mm256_mul_ps(acc, scalev);
546
    minv = _mm256_set1_ps(0.0f);
547
    maxv = _mm256_set1_ps(1.0f);
548
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
549
    _mm256_storeu_ps(out, acc);
550
    dst[0] = out[0];
551
    dst[1] = out[1];
552
    dst[2] = out[2];
553
}
554
#endif  /* SIXEL_USE_AVX2 */
555

556
#if defined(SIXEL_USE_AVX512)
557
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
558
sixel_avx512_load_rgb_ps(unsigned char const *psrc)
559
{
560
    __m128i pixi128;
561
    __m512i pixi512;
562

563
    pixi128 = _mm_setr_epi8((char)psrc[0],
564
                            (char)psrc[1],
565
                            (char)psrc[2],
566
                            0,
567
                            0, 0, 0, 0,
568
                            0, 0, 0, 0,
569
                            0, 0, 0, 0);
570
    pixi512 = _mm512_cvtepu8_epi32(pixi128);
571
    return _mm512_cvtepi32_ps(pixi512);
572
}
573

574
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 void
575
sixel_avx512_store_rgb_u8(__m512 const *acc,
576
                          double total,
577
                          unsigned char *dst)
578
{
579
    __m512 scalev;
580
    __m512 minv;
581
    __m512 maxv;
582
    __m512 accv;
583
    __m512i acci;
584
    int out[16];
585

586
    scalev = _mm512_set1_ps((float)(1.0 / total));
587
    accv = _mm512_mul_ps(*acc, scalev);
588
    minv = _mm512_set1_ps(0.0f);
589
    maxv = _mm512_set1_ps(255.0f);
590
    accv = _mm512_max_ps(minv, _mm512_min_ps(accv, maxv));
591
    acci = _mm512_cvtps_epi32(accv);
592
    _mm512_storeu_si512((void *)out, acci);
593
    dst[0] = (unsigned char)out[0];
594
    dst[1] = (unsigned char)out[1];
595
    dst[2] = (unsigned char)out[2];
596
}
597

598
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
599
sixel_avx512_zero_ps(void)
600
{
601
    return _mm512_setzero_ps();
602
}
603

604
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
605
sixel_avx512_muladd_ps(__m512 acc, __m512 pix, float weight)
606
{
607
    __m512 wv;
608

609
    wv = _mm512_set1_ps(weight);
610
    return _mm512_add_ps(acc, _mm512_mul_ps(pix, wv));
611
}
612

613
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
614
sixel_avx512_load_rgb_f32(float const *psrc)
615
{
616
    __m512 pixf;
617

618
    pixf = _mm512_set_ps(0.0f, 0.0f, 0.0f, 0.0f,
619
                         0.0f, 0.0f, 0.0f, 0.0f,
620
                         0.0f, 0.0f, 0.0f, 0.0f,
621
                         psrc[2], psrc[1], psrc[0], 0.0f);
622
    return pixf;
623
}
624

625
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 void
626
sixel_avx512_store_rgb_f32(__m512 const *acc,
627
                           double total,
628
                           float *dst)
629
{
630
    __m512 scalev;
631
    __m512 minv;
632
    __m512 maxv;
633
    __m512 accv;
634
    float out[16];
635

636
    scalev = _mm512_set1_ps((float)(1.0 / total));
637
    accv = _mm512_mul_ps(*acc, scalev);
638
    minv = _mm512_set1_ps(0.0f);
639
    maxv = _mm512_set1_ps(1.0f);
640
    accv = _mm512_max_ps(minv, _mm512_min_ps(accv, maxv));
641
    _mm512_storeu_ps(out, accv);
642
    dst[0] = out[0];
643
    dst[1] = out[1];
644
    dst[2] = out[2];
645
}
646
#endif  /* SIXEL_USE_AVX512 */
647
#endif /* HAVE_IMMINTRIN_H */
648

649

650
static void
651
scale_without_resampling(
9✔
652
    unsigned char *dst,
653
    unsigned char const *src,
654
    int const srcw,
655
    int const srch,
656
    int const dstw,
657
    int const dsth,
658
    int const depth)
659
{
660
    int w;
9✔
661
    int h;
9✔
662
    int x;
9✔
663
    int y;
9✔
664
    int i;
9✔
665
    int pos;
9✔
666

667
    for (h = 0; h < dsth; h++) {
419✔
668
        for (w = 0; w < dstw; w++) {
252,450✔
669
            x = (long)w * srcw / dstw;
252,040✔
670
            y = (long)h * srch / dsth;
252,040✔
671
            for (i = 0; i < depth; i++) {
1,008,160✔
672
                pos = (y * srcw + x) * depth + i;
756,120✔
673
                dst[(h * dstw + w) * depth + i] = src[pos];
756,120✔
674
            }
675
        }
676
    }
677
}
9✔
678

679
static void
680
scale_without_resampling_float32(
20✔
681
    float *dst,
682
    float const *src,
683
    int const srcw,
684
    int const srch,
685
    int const dstw,
686
    int const dsth,
687
    int const depth)
688
{
689
    int w;
20✔
690
    int h;
20✔
691
    int x;
20✔
692
    int y;
20✔
693
    int i;
20✔
694
    int pos;
20✔
695

696
    for (h = 0; h < dsth; h++) {
260✔
697
        for (w = 0; w < dstw; w++) {
3,120✔
698
            x = (long)w * srcw / dstw;
2,880✔
699
            y = (long)h * srch / dsth;
2,880✔
700
            for (i = 0; i < depth; i++) {
11,520✔
701
                pos = (y * srcw + x) * depth + i;
8,640✔
702
                dst[(h * dstw + w) * depth + i] = src[pos];
8,640✔
703
            }
704
        }
705
    }
706
}
20✔
707

708

709
typedef double (*resample_fn_t)(double const d);
710

711
#if defined(__GNUC__) && !defined(__clang__) && !defined(__PCC__)
712
#pragma GCC diagnostic push
713
/*
714
 * GCC emits a -Wpsabi note for __m512 parameters because the calling
715
 * convention changed in GCC 4.6. The functions only pass vectors between
716
 * helpers compiled with the same compiler, so suppress the noise locally.
717
 */
718
#pragma GCC diagnostic ignored "-Wpsabi"
719
#endif
720

721
/*
722
 * Two-pass separable filter helpers. Each function processes a single row so
723
 * the caller may invoke them serially or from a threadpool worker. On i386 we
724
 * also mark the functions noinline to ensure the stack-realigning prologue
725
 * from SIXEL_ALIGN_STACK is preserved under optimization.
726
 */
727
static SIXEL_ALIGN_STACK SIXEL_NO_INLINE void
728
scale_horizontal_row(
42,888✔
729
    unsigned char *tmp,
730
    unsigned char const *src,
731
    int const srcw,
732
    int const dstw,
733
    int const depth,
734
    int const y,
735
    resample_fn_t const f_resample,
736
    double const n,
737
    int const simd_level)
738
{
739
    int w;
42,888✔
740
    int x;
42,888✔
741
    int i;
42,888✔
742
    int pos;
42,888✔
743
    int x_first;
42,888✔
744
    int x_last;
42,888✔
745
    double center_x;
42,888✔
746
    double diff_x;
42,888✔
747
    double weight;
42,888✔
748
    double total;
42,888✔
749
    double offsets[8];
42,888✔
750
#if !defined(SIXEL_USE_AVX512) && !defined(SIXEL_USE_AVX2) && \
751
    !defined(SIXEL_USE_AVX) && !defined(SIXEL_USE_SSE2) && \
752
    !defined(SIXEL_USE_NEON)
753
    /*
754
     * No SIMD backends are compiled for this target, so the SIMD level gate
755
     * becomes a dead parameter. Silence -Wunused-parameter on 32-bit GCC
756
     * builds while keeping the signature identical across configurations.
757
     */
758
    (void)simd_level;
11,454✔
759
#endif
760
#if defined(SIXEL_USE_AVX512)
761
    __m512 acc512;
762
    __m512 pix512;
763
#endif
764
#if defined(SIXEL_USE_AVX2) || defined(SIXEL_USE_AVX)
765
    __m256 acc256;
766
#endif
767
#if defined(SIXEL_USE_SSE2)
768
    /*
769
     * __m128 locals remain on the stack. On i386 callers may arrive with
770
     * only 4- or 8-byte alignment, so movaps spills can fault when SSE2 is
771
     * forced. SIXEL_ALIGN_STACK realigns the frame on entry to keep the
772
     * SSE2 path consistent with the 16-byte guarantee on x86_64.
773
     */
774
    __m128 acc128;
22,908✔
775
    __m128 minv128;
22,908✔
776
    __m128 maxv128;
22,908✔
777
    __m128 scalev128;
22,908✔
778
    __m128 wv128;
22,908✔
779
    __m128 pixf128;
22,908✔
780
    __m128i pixi128;
22,908✔
781
    __m128i acci128;
22,908✔
782
    __m128i acc16_128;
22,908✔
783
    unsigned int pixel128;
22,908✔
784
#endif
785
#if defined(SIXEL_USE_NEON)
786
    float32x4_t acc_neon;
8,526✔
787
    float32x4_t minv_neon;
8,526✔
788
    float32x4_t maxv_neon;
8,526✔
789
    float32x4_t scalev_neon;
8,526✔
790
    float32x4_t wv_neon;
8,526✔
791
    float32x4_t pixf_neon;
8,526✔
792
    uint32x4_t pix32_neon;
8,526✔
793
    uint32x4_t acci_neon;
8,526✔
794
    uint16x4_t acc16_neon;
8,526✔
795
    uint8x8_t acc8_neon;
8,526✔
796
    uint8_t outb_neon[8];
8,526✔
797
#endif
798

799
    for (w = 0; w < dstw; w++) {
10,419,336✔
800
        total = 0.0;
41,505,792✔
801
        for (i = 0; i < depth; i++) {
41,505,792✔
802
            offsets[i] = 0;
31,129,344✔
803
        }
804

805
        if (dstw >= srcw) {
10,376,448!
806
            center_x = (w + 0.5) * srcw / dstw;
768,000✔
807
            x_first = MAX((int)(center_x - n), 0);
768,000!
808
            x_last = MIN((int)(center_x + n), srcw - 1);
768,000!
809
        } else {
810
            center_x = w + 0.5;
9,608,448✔
811
            x_first = MAX((int)floor((center_x - n) * srcw / dstw), 0);
9,608,448✔
812
            x_last = MIN((int)floor((center_x + n) * srcw / dstw),
9,608,448✔
813
                         srcw - 1);
814
        }
815

816
#if defined(SIXEL_USE_AVX512)
817
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
818
#if defined(__GNUC__) && !defined(__clang__)
819
#pragma GCC diagnostic push
820
#pragma GCC diagnostic ignored "-Wpsabi"
821
#endif
822
            acc512 = sixel_avx512_zero_ps();
823

824
            for (x = x_first; x <= x_last; x++) {
825
                diff_x = (dstw >= srcw)
826
                             ? (x + 0.5) - center_x
827
                             : (x + 0.5) * dstw / srcw - center_x;
828
                weight = f_resample(fabs(diff_x));
829
                pos = (y * srcw + x) * depth;
830
                pix512 = sixel_avx512_load_rgb_ps(src + pos);
831
                acc512 = sixel_avx512_muladd_ps(
832
                    acc512,
833
                    pix512,
834
                    (float)weight);
835
                total += weight;
836
            }
837
            if (total > 0.0) {
838
                pos = (y * dstw + w) * depth;
839
                sixel_avx512_store_rgb_u8(&acc512, total, tmp + pos);
840
            }
841
#if defined(__GNUC__) && !defined(__clang__)
842
#pragma GCC diagnostic pop
843
#endif
844
            continue;
845
        }
846
#endif
847
#if defined(SIXEL_USE_AVX2)
848
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
849
            acc256 = sixel_avx2_zero_ps();
850

851
            for (x = x_first; x <= x_last; x++) {
852
                diff_x = (dstw >= srcw)
853
                             ? (x + 0.5) - center_x
854
                             : (x + 0.5) * dstw / srcw - center_x;
855
                weight = f_resample(fabs(diff_x));
856
                pos = (y * srcw + x) * depth;
857
                acc256 = sixel_avx2_muladd_ps(
858
                    acc256,
859
                    sixel_avx2_load_rgb_ps(src + pos),
860
                    (float)weight);
861
                total += weight;
862
            }
863
            if (total > 0.0) {
864
                pos = (y * dstw + w) * depth;
865
                sixel_avx2_store_rgb_u8(acc256, total, tmp + pos);
866
            }
867
            continue;
868
        }
869
#endif
870
#if defined(SIXEL_USE_AVX)
871
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
872
            acc256 = sixel_avx_zero_ps();
873

874
            for (x = x_first; x <= x_last; x++) {
875
                diff_x = (dstw >= srcw)
876
                             ? (x + 0.5) - center_x
877
                             : (x + 0.5) * dstw / srcw - center_x;
878
                weight = f_resample(fabs(diff_x));
879
                pos = (y * srcw + x) * depth;
880
                acc256 = sixel_avx_muladd_ps(
881
                    acc256,
882
                    sixel_avx_load_rgb_ps(src + pos),
883
                    (float)weight);
884
                total += weight;
885
            }
886
            if (total > 0.0) {
887
                pos = (y * dstw + w) * depth;
888
                sixel_avx_store_rgb_u8(acc256, total, tmp + pos);
889
            }
890
            continue;
891
        }
892
#endif
893
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
894
        if (depth == 3
15,482,688!
895
# if defined(SIXEL_USE_SSE2)
896
            && simd_level >= SIXEL_SIMD_LEVEL_SSE2
5,270,208!
897
# elif defined(SIXEL_USE_NEON)
898
            && simd_level >= SIXEL_SIMD_LEVEL_NEON
2,471,136!
899
# endif
900
            ) {
901
#if defined(SIXEL_USE_SSE2)
902
            acc128 = _mm_setzero_ps();
903
#elif defined(SIXEL_USE_NEON)
904
            acc_neon = vdupq_n_f32(0.0f);
905
#endif
906
            for (x = x_first; x <= x_last; x++) {
134,448,972✔
907
                diff_x = (dstw >= srcw)
253,415,256!
908
                             ? (x + 0.5) - center_x
1,724,400✔
909
                             : (x + 0.5) * dstw / srcw - center_x;
126,707,628✔
910
                weight = f_resample(fabs(diff_x));
126,707,628✔
911
                pos = (y * srcw + x) * depth;
126,707,628✔
912
                const unsigned char *psrc = src + pos;
126,707,628✔
913
#if defined(SIXEL_USE_SSE2)
914
                pixel128 = psrc[0] | (psrc[1] << 8) | (psrc[2] << 16);
106,737,240✔
915
                pixi128 = _mm_cvtsi32_si128((int)pixel128);
106,737,240✔
916
                pixi128 = _mm_unpacklo_epi8(pixi128, _mm_setzero_si128());
106,737,240✔
917
                pixi128 = _mm_unpacklo_epi16(pixi128, _mm_setzero_si128());
106,737,240✔
918
                pixf128 = _mm_cvtepi32_ps(pixi128);
106,737,240✔
919
                wv128 = _mm_set1_ps((float)weight);
106,737,240✔
920
                acc128 = _mm_add_ps(acc128, _mm_mul_ps(pixf128, wv128));
106,737,240✔
921
#else /* NEON */
922
                pix32_neon = (uint32x4_t){psrc[0], psrc[1], psrc[2], 0};
19,970,388✔
923
                pixf_neon = vcvtq_f32_u32(pix32_neon);
19,970,388✔
924
                wv_neon = vdupq_n_f32((float)weight);
19,970,388✔
925
                acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
19,970,388✔
926
#endif
927
                total += weight;
126,707,628✔
928
            }
929
            if (total > 0.0) {
7,741,344!
930
#if defined(SIXEL_USE_SSE2)
931
                scalev128 = _mm_set1_ps((float)(1.0 / total));
5,270,208✔
932
                acc128 = _mm_mul_ps(acc128, scalev128);
5,270,208✔
933
                minv128 = _mm_set1_ps(0.0f);
5,270,208✔
934
                maxv128 = _mm_set1_ps(255.0f);
5,270,208✔
935
                acc128 = _mm_max_ps(minv128, _mm_min_ps(acc128, maxv128));
5,270,208✔
936
                acci128 = _mm_cvtps_epi32(acc128);
5,270,208✔
937
                acc16_128 = _mm_packs_epi32(acci128, _mm_setzero_si128());
5,270,208✔
938
                acc16_128 = _mm_packus_epi16(acc16_128, _mm_setzero_si128());
5,270,208✔
939
                pos = (y * dstw + w) * depth;
5,270,208✔
940
                pixel128 = (unsigned int)_mm_cvtsi128_si32(acc16_128);
5,270,208✔
941
                tmp[pos + 0] = (unsigned char)pixel128;
5,270,208✔
942
                tmp[pos + 1] = (unsigned char)(pixel128 >> 8);
5,270,208✔
943
                tmp[pos + 2] = (unsigned char)(pixel128 >> 16);
5,270,208✔
944
#else /* NEON */
945
                scalev_neon = vdupq_n_f32((float)(1.0 / total));
2,471,136✔
946
                acc_neon = vmulq_f32(acc_neon, scalev_neon);
2,471,136✔
947
                minv_neon = vdupq_n_f32(0.0f);
2,471,136✔
948
                maxv_neon = vdupq_n_f32(255.0f);
2,471,136✔
949
                acc_neon = vmaxq_f32(minv_neon,
2,471,136✔
950
                                     vminq_f32(acc_neon, maxv_neon));
951
                acci_neon = vcvtq_u32_f32(acc_neon);
2,471,136✔
952
                acc16_neon = vmovn_u32(acci_neon);
2,471,136✔
953
                acc8_neon = vmovn_u16(vcombine_u16(acc16_neon, acc16_neon));
2,471,136✔
954

955
                vst1_u8(outb_neon, acc8_neon);
2,471,136✔
956
                pos = (y * dstw + w) * depth;
2,471,136✔
957
                tmp[pos + 0] = outb_neon[0];
2,471,136✔
958
                tmp[pos + 1] = outb_neon[1];
2,471,136✔
959
                tmp[pos + 2] = outb_neon[2];
2,471,136✔
960
#endif
961
            }
962
            continue;
7,741,344✔
963
        }
964
#endif /* SIMD paths */
965

966
        for (x = x_first; x <= x_last; x++) {
56,003,724!
967
            diff_x = (dstw >= srcw)
106,737,240!
968
                         ? (x + 0.5) - center_x
574,800✔
969
                         : (x + 0.5) * dstw / srcw - center_x;
53,368,620!
970
            weight = f_resample(fabs(diff_x));
53,368,620✔
971
            for (i = 0; i < depth; i++) {
266,843,100!
972
                pos = (y * srcw + x) * depth + i;
160,105,860✔
973
                offsets[i] += src[pos] * weight;
160,105,860✔
974
            }
975
            total += weight;
53,368,620✔
976
        }
977

978
        if (total > 0.0) {
2,635,104!
979
            for (i = 0; i < depth; i++) {
10,540,416!
980
                pos = (y * dstw + w) * depth + i;
7,905,312✔
981
                tmp[pos] = normalize(offsets[i], total);
7,905,312✔
982
            }
983
        }
984
    }
985
}
42,888✔
986

987
static SIXEL_ALIGN_STACK SIXEL_NO_INLINE void
988
scale_vertical_row(
14,603✔
989
    unsigned char *dst,
990
    unsigned char const *tmp,
991
    int const dstw,
992
    int const dsth,
993
    int const depth,
994
    int const srch,
995
    int const h,
996
    resample_fn_t const f_resample,
997
    double const n,
998
    int const simd_level)
999
{
1000
    int w;
14,603✔
1001
    int y;
14,603✔
1002
    int i;
14,603✔
1003
    int pos;
14,603✔
1004
    int y_first;
14,603✔
1005
    int y_last;
14,603✔
1006
    double center_y;
14,603✔
1007
    double diff_y;
14,603✔
1008
    double weight;
14,603✔
1009
    double total;
14,603✔
1010
    double offsets[8];
14,603✔
1011
#if !defined(SIXEL_USE_AVX512) && !defined(SIXEL_USE_AVX2) && \
1012
    !defined(SIXEL_USE_AVX) && !defined(SIXEL_USE_SSE2) && \
1013
    !defined(SIXEL_USE_NEON)
1014
    /*
1015
     * When no SIMD implementations are present the runtime SIMD level does
1016
     * not influence the algorithm. Mark it unused to keep 32-bit GCC quiet
1017
     * without altering the interface shared with SIMD-enabled builds.
1018
     */
1019
    (void)simd_level;
3,665✔
1020
#endif
1021
#if defined(SIXEL_USE_AVX512)
1022
    __m512 acc512;
1023
    __m512 pix512;
1024
#endif
1025
#if defined(SIXEL_USE_AVX2) || defined(SIXEL_USE_AVX)
1026
    __m256 acc256;
1027
#endif
1028
#if defined(SIXEL_USE_SSE2)
1029
    __m128 acc128;
7,330✔
1030
    __m128 minv128;
7,330✔
1031
    __m128 maxv128;
7,330✔
1032
    __m128 scalev128;
7,330✔
1033
    __m128 wv128;
7,330✔
1034
    __m128 pixf128;
7,330✔
1035
    __m128i pixi128;
7,330✔
1036
    __m128i acci128;
7,330✔
1037
    __m128i acc16_128;
7,330✔
1038
    unsigned int pixel128;
7,330✔
1039
#endif
1040
#if defined(SIXEL_USE_NEON)
1041
    float32x4_t acc_neon;
3,608✔
1042
    float32x4_t minv_neon;
3,608✔
1043
    float32x4_t maxv_neon;
3,608✔
1044
    float32x4_t scalev_neon;
3,608✔
1045
    float32x4_t wv_neon;
3,608✔
1046
    float32x4_t pixf_neon;
3,608✔
1047
    uint32x4_t pix32_neon;
3,608✔
1048
    uint32x4_t acci_neon;
3,608✔
1049
    uint16x4_t acc16_neon;
3,608✔
1050
    uint8x8_t acc8_neon;
3,608✔
1051
    uint8_t outb_neon[8];
3,608✔
1052
#endif
1053

1054
    for (w = 0; w < dstw; w++) {
5,513,319✔
1055
        total = 0.0;
21,994,864✔
1056
        for (i = 0; i < depth; i++) {
21,994,864✔
1057
            offsets[i] = 0;
16,496,148✔
1058
        }
1059

1060
        if (dsth >= srch) {
5,498,716!
1061
            center_y = (h + 0.5) * srch / dsth;
2,346,000✔
1062
            y_first = MAX((int)(center_y - n), 0);
2,346,000!
1063
            y_last = MIN((int)(center_y + n), srch - 1);
2,346,000!
1064
        } else {
1065
            center_y = h + 0.5;
3,152,716✔
1066
            y_first = MAX((int)floor((center_y - n) * srch / dsth), 0);
3,152,716✔
1067
            y_last = MIN((int)floor((center_y + n) * srch / dsth),
3,152,716✔
1068
                         srch - 1);
1069
        }
1070

1071
#if defined(SIXEL_USE_AVX512)
1072
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
1073
#if defined(__GNUC__) && !defined(__clang__)
1074
#pragma GCC diagnostic push
1075
#pragma GCC diagnostic ignored "-Wpsabi"
1076
#endif
1077
            acc512 = sixel_avx512_zero_ps();
1078

1079
            for (y = y_first; y <= y_last; y++) {
1080
                diff_y = (dsth >= srch)
1081
                             ? (y + 0.5) - center_y
1082
                             : (y + 0.5) * dsth / srch - center_y;
1083
                weight = f_resample(fabs(diff_y));
1084
                pos = (y * dstw + w) * depth;
1085
                pix512 = sixel_avx512_load_rgb_ps(tmp + pos);
1086
                acc512 = sixel_avx512_muladd_ps(
1087
                    acc512,
1088
                    pix512,
1089
                    (float)weight);
1090
                total += weight;
1091
            }
1092
            if (total > 0.0) {
1093
                pos = (h * dstw + w) * depth;
1094
                sixel_avx512_store_rgb_u8(&acc512, total, dst + pos);
1095
            }
1096
#if defined(__GNUC__) && !defined(__clang__)
1097
#pragma GCC diagnostic pop
1098
#endif
1099
            continue;
1100
        }
1101
#endif
1102
#if defined(SIXEL_USE_AVX2)
1103
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
1104
            acc256 = sixel_avx2_zero_ps();
1105

1106
            for (y = y_first; y <= y_last; y++) {
1107
                diff_y = (dsth >= srch)
1108
                             ? (y + 0.5) - center_y
1109
                             : (y + 0.5) * dsth / srch - center_y;
1110
                weight = f_resample(fabs(diff_y));
1111
                pos = (y * dstw + w) * depth;
1112
                acc256 = sixel_avx2_muladd_ps(
1113
                    acc256,
1114
                    sixel_avx2_load_rgb_ps(tmp + pos),
1115
                    (float)weight);
1116
                total += weight;
1117
            }
1118
            if (total > 0.0) {
1119
                pos = (h * dstw + w) * depth;
1120
                sixel_avx2_store_rgb_u8(acc256, total, dst + pos);
1121
            }
1122
            continue;
1123
        }
1124
#endif
1125
#if defined(SIXEL_USE_AVX)
1126
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
1127
            acc256 = sixel_avx_zero_ps();
1128

1129
            for (y = y_first; y <= y_last; y++) {
1130
                diff_y = (dsth >= srch)
1131
                             ? (y + 0.5) - center_y
1132
                             : (y + 0.5) * dsth / srch - center_y;
1133
                weight = f_resample(fabs(diff_y));
1134
                pos = (y * dstw + w) * depth;
1135
                acc256 = sixel_avx_muladd_ps(
1136
                    acc256,
1137
                    sixel_avx_load_rgb_ps(tmp + pos),
1138
                    (float)weight);
1139
                total += weight;
1140
            }
1141
            if (total > 0.0) {
1142
                pos = (h * dstw + w) * depth;
1143
                sixel_avx_store_rgb_u8(acc256, total, dst + pos);
1144
            }
1145
            continue;
1146
        }
1147
#endif
1148
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
1149
        if (depth == 3
8,246,450!
1150
# if defined(SIXEL_USE_SSE2)
1151
            && simd_level >= SIXEL_SIMD_LEVEL_SSE2
2,750,982!
1152
# elif defined(SIXEL_USE_NEON)
1153
            && simd_level >= SIXEL_SIMD_LEVEL_NEON
1,372,243!
1154
# endif
1155
            ) {
1156
#if defined(SIXEL_USE_SSE2)
1157
            acc128 = _mm_setzero_ps();
1158
#elif defined(SIXEL_USE_NEON)
1159
            acc_neon = vdupq_n_f32(0.0f);
1160
#endif
1161
            for (y = y_first; y <= y_last; y++) {
25,993,020✔
1162
                diff_y = (dsth >= srch)
43,739,590!
1163
                             ? (y + 0.5) - center_y
5,257,560✔
1164
                             : (y + 0.5) * dsth / srch - center_y;
21,869,795✔
1165
                weight = f_resample(fabs(diff_y));
21,869,795✔
1166
                pos = (y * dstw + w) * depth;
21,869,795✔
1167
                const unsigned char *psrc = tmp + pos;
21,869,795✔
1168
#if defined(SIXEL_USE_SSE2)
1169
                pixel128 = psrc[0] | (psrc[1] << 8) | (psrc[2] << 16);
14,969,122✔
1170
                pixi128 = _mm_cvtsi32_si128((int)pixel128);
14,969,122✔
1171
                pixi128 = _mm_unpacklo_epi8(pixi128, _mm_setzero_si128());
14,969,122✔
1172
                pixi128 = _mm_unpacklo_epi16(pixi128, _mm_setzero_si128());
14,969,122✔
1173
                pixf128 = _mm_cvtepi32_ps(pixi128);
14,969,122✔
1174
                wv128 = _mm_set1_ps((float)weight);
14,969,122✔
1175
                acc128 = _mm_add_ps(acc128, _mm_mul_ps(pixf128, wv128));
14,969,122✔
1176
#else /* NEON */
1177
                pix32_neon = (uint32x4_t){psrc[0], psrc[1], psrc[2], 0};
6,900,673✔
1178
                pixf_neon = vcvtq_f32_u32(pix32_neon);
6,900,673✔
1179
                wv_neon = vdupq_n_f32((float)weight);
6,900,673✔
1180
                acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
6,900,673✔
1181
#endif
1182
                total += weight;
21,869,795✔
1183
            }
1184
            if (total > 0.0) {
4,123,225!
1185
#if defined(SIXEL_USE_SSE2)
1186
                scalev128 = _mm_set1_ps((float)(1.0 / total));
2,750,982✔
1187
                acc128 = _mm_mul_ps(acc128, scalev128);
2,750,982✔
1188
                minv128 = _mm_set1_ps(0.0f);
2,750,982✔
1189
                maxv128 = _mm_set1_ps(255.0f);
2,750,982✔
1190
                acc128 = _mm_max_ps(minv128, _mm_min_ps(acc128, maxv128));
2,750,982✔
1191
                acci128 = _mm_cvtps_epi32(acc128);
2,750,982✔
1192
                acc16_128 = _mm_packs_epi32(acci128, _mm_setzero_si128());
2,750,982✔
1193
                acc16_128 = _mm_packus_epi16(acc16_128, _mm_setzero_si128());
2,750,982✔
1194
                pos = (h * dstw + w) * depth;
2,750,982✔
1195
                pixel128 = (unsigned int)_mm_cvtsi128_si32(acc16_128);
2,750,982✔
1196
                dst[pos + 0] = (unsigned char)pixel128;
2,750,982✔
1197
                dst[pos + 1] = (unsigned char)(pixel128 >> 8);
2,750,982✔
1198
                dst[pos + 2] = (unsigned char)(pixel128 >> 16);
2,750,982✔
1199
#else /* NEON */
1200
                scalev_neon = vdupq_n_f32((float)(1.0 / total));
1,372,243✔
1201
                acc_neon = vmulq_f32(acc_neon, scalev_neon);
1,372,243✔
1202
                minv_neon = vdupq_n_f32(0.0f);
1,372,243✔
1203
                maxv_neon = vdupq_n_f32(255.0f);
1,372,243✔
1204
                acc_neon = vmaxq_f32(minv_neon,
1,372,243✔
1205
                                     vminq_f32(acc_neon, maxv_neon));
1206
                acci_neon = vcvtq_u32_f32(acc_neon);
1,372,243✔
1207
                acc16_neon = vmovn_u32(acci_neon);
1,372,243✔
1208
                acc8_neon = vmovn_u16(vcombine_u16(acc16_neon, acc16_neon));
1,372,243✔
1209

1210
                vst1_u8(outb_neon, acc8_neon);
1,372,243✔
1211
                pos = (h * dstw + w) * depth;
1,372,243✔
1212
                dst[pos + 0] = outb_neon[0];
1,372,243✔
1213
                dst[pos + 1] = outb_neon[1];
1,372,243✔
1214
                dst[pos + 2] = outb_neon[2];
1,372,243✔
1215
#endif
1216
            }
1217
            continue;
4,123,225✔
1218
        }
1219
#endif /* SIMD paths */
1220
        for (y = y_first; y <= y_last; y++) {
8,860,052!
1221
            diff_y = (dsth >= srch)
14,969,122!
1222
                         ? (y + 0.5) - center_y
1,752,520✔
1223
                         : (y + 0.5) * dsth / srch - center_y;
7,484,561!
1224
            weight = f_resample(fabs(diff_y));
7,484,561✔
1225
            for (i = 0; i < depth; i++) {
37,422,805!
1226
                pos = (y * dstw + w) * depth + i;
22,453,683✔
1227
                offsets[i] += tmp[pos] * weight;
22,453,683✔
1228
            }
1229
            total += weight;
7,484,561✔
1230
        }
1231

1232
        if (total > 0.0) {
1,375,491!
1233
            for (i = 0; i < depth; i++) {
5,501,964!
1234
                pos = (h * dstw + w) * depth + i;
4,126,473✔
1235
                dst[pos] = normalize(offsets[i], total);
4,126,473✔
1236
            }
1237
        }
1238
    }
1239
}
14,603✔
1240

1241
#if defined(__GNUC__) && !defined(__clang__) && !defined(__PCC__)
1242
# pragma GCC diagnostic pop
1243
#endif
1244

1245
static void
1246
scale_with_resampling_serial(
83✔
1247
    unsigned char *dst,
1248
    unsigned char const *src,
1249
    int const srcw,
1250
    int const srch,
1251
    int const dstw,
1252
    int const dsth,
1253
    int const depth,
1254
    resample_fn_t const f_resample,
1255
    double const n,
1256
    unsigned char *tmp)
1257
{
1258
    int y;
83✔
1259
    int h;
83✔
1260
    int simd_level;
83✔
1261

1262
    simd_level = sixel_scale_simd_level();
83✔
1263
#if !defined(SIXEL_USE_AVX512) && !defined(SIXEL_USE_AVX2) && \
1264
    !defined(SIXEL_USE_AVX) && !defined(SIXEL_USE_SSE2) && \
1265
    !defined(SIXEL_USE_NEON)
1266
    /*
1267
     * GCC i686 builds can compile this function without any SIMD backends
1268
     * enabled; consume the detection result to keep the signature stable
1269
     * while avoiding an unused-but-set-variable warning.
1270
     */
1271
    (void)simd_level;
20✔
1272
#endif
1273

1274
    for (y = 0; y < srch; y++) {
32,806✔
1275
        scale_horizontal_row(tmp,
32,640✔
1276
                             src,
1277
                             srcw,
1278
                             dstw,
1279
                             depth,
1280
                             y,
1281
                             f_resample,
1282
                             n,
1283
                             simd_level);
1284
    }
1285

1286
    for (h = 0; h < dsth; h++) {
12,772✔
1287
        scale_vertical_row(dst,
12,689✔
1288
                           tmp,
1289
                           dstw,
1290
                           dsth,
1291
                           depth,
1292
                           srch,
1293
                           h,
1294
                           f_resample,
1295
                           n,
1296
                           simd_level);
1297
    }
1298
}
83✔
1299

1300
#if SIXEL_ENABLE_THREADS
1301
typedef enum scale_parallel_pass {
1302
    SCALE_PASS_HORIZONTAL = 0,
1303
    SCALE_PASS_VERTICAL = 1
1304
} scale_parallel_pass_t;
1305

1306
typedef struct scale_parallel_context {
1307
    unsigned char *dst;
1308
    unsigned char const *src;
1309
    unsigned char *tmp;
1310
    int srcw;
1311
    int srch;
1312
    int dstw;
1313
    int dsth;
1314
    int depth;
1315
    resample_fn_t f_resample;
1316
    double n;
1317
    scale_parallel_pass_t pass;
1318
    int simd_level;
1319
    int band_span;
1320
    sixel_logger_t *logger;
1321
} scale_parallel_context_t;
1322

1323
/*
1324
 * Emit timeline entries for every band so downstream aggregation can compute
1325
 * first/last activity windows per thread without losing information.
1326
 */
1327
static int
1328
scale_parallel_should_log(scale_parallel_context_t const *ctx, int index)
116✔
1329
{
1330
    int span;
116✔
1331

1332
    if (ctx == NULL || ctx->logger == NULL || !ctx->logger->active) {
116!
1333
        return 0;
1334
    }
1335

1336
    if (index < 0) {
×
1337
        return 0;
1338
    }
1339

1340
    if (ctx->pass == SCALE_PASS_HORIZONTAL) {
×
1341
        span = ctx->srch;
1342
    } else {
1343
        span = ctx->dsth;
1344
    }
1345

1346
    if (span <= 0 || index >= span) {
×
1347
        return 0;
1348
    }
1349

1350
    return 1;
1351
}
1352

1353
/*
1354
 * Allow callers to raise the floor for parallel execution using
1355
 * SIXEL_SCALE_PARALLEL_MIN_BYTES. The default of zero preserves the previous
1356
 * eager behavior while permitting deployments to defer threading on tiny
1357
 * inputs.
1358
 */
1359
static size_t
1360
scale_parallel_min_bytes(void)
67✔
1361
{
1362
    static int initialized = 0;
67✔
1363
    static size_t threshold = 0;
67✔
1364
    char const *text;
67✔
1365
    char *endptr;
67✔
1366
    unsigned long long parsed;
67✔
1367

1368
    if (initialized) {
67!
1369
        return threshold;
1370
    }
1371

1372
    initialized = 1;
67✔
1373
    text = sixel_compat_getenv("SIXEL_SCALE_PARALLEL_MIN_BYTES");
67✔
1374
    if (text == NULL || text[0] == '\0') {
67!
1375
        return threshold;
67✔
1376
    }
1377

1378
    errno = 0;
1379
    parsed = strtoull(text, &endptr, 10);
1380
    if (endptr == text || *endptr != '\0' || errno == ERANGE) {
×
1381
        return threshold;
1382
    }
1383

1384
    if (parsed > (unsigned long long)SIZE_MAX) {
×
1385
        threshold = SIZE_MAX;
1386
    } else {
1387
        threshold = (size_t)parsed;
1388
    }
1389

1390
    return threshold;
1391
}
1392

1393
/*
1394
 * Choose the number of rows handled per threadpool job. We prefer an
1395
 * environment override via SIXEL_PARALLEL_FACTOR so deployments can tune
1396
 * queueing overhead. Otherwise derive a span from rows/threads and clamp to
1397
 * [1, rows]. The value is cached after the first lookup.
1398
 */
1399
static int
1400
scale_parallel_band_span(int rows, int threads)
14✔
1401
{
1402
    static int initialized = 0;
14✔
1403
    static int env_span = 0;
14✔
1404
    char const *text;
14✔
1405
    char *endptr;
14✔
1406
    long parsed;
14✔
1407
    int span;
14✔
1408

1409
    if (rows <= 0) {
14!
1410
        return 1;
1411
    }
1412

1413
    if (!initialized) {
14!
1414
        initialized = 1;
7✔
1415
        text = sixel_compat_getenv("SIXEL_PARALLEL_FACTOR");
7✔
1416
        if (text != NULL && text[0] != '\0') {
7!
1417
            errno = 0;
1418
            parsed = strtol(text, &endptr, 10);
1419
            if (endptr != text && *endptr == '\0' && errno != ERANGE &&
×
1420
                parsed > 0 && parsed <= INT_MAX) {
×
1421
                env_span = (int)parsed;
1422
            }
1423
        }
1424
    }
1425

1426
    if (env_span > 0) {
14!
1427
        span = env_span;
1428
    } else {
1429
        span = rows / threads;
14✔
1430
    }
1431

1432
    if (span < 1) {
14!
1433
        span = 1;
1434
    }
1435
    if (span > rows) {
14!
1436
        span = rows;
1437
    }
1438

1439
    return span;
1440
}
1441

1442
static int
1443
scale_parallel_worker(tp_job_t job, void *userdata, void *workspace)
58✔
1444
{
1445
    scale_parallel_context_t *ctx;
58✔
1446
    int index;
58✔
1447
    char const *role;
58✔
1448
    int y0;
58✔
1449
    int y1;
58✔
1450
    int in0;
58✔
1451
    int in1;
58✔
1452
    int limit;
58✔
1453
    int y;
58✔
1454

1455
    (void)workspace;
58✔
1456
    ctx = (scale_parallel_context_t *)userdata;
58✔
1457
    if (ctx == NULL) {
58!
1458
        return SIXEL_BAD_ARGUMENT;
1459
    }
1460

1461
    role = "horizontal";
58✔
1462
    y0 = 0;
58✔
1463
    y1 = 0;
58✔
1464
    in0 = 0;
58✔
1465
    in1 = 0;
58✔
1466
    index = job.band_index;
58✔
1467
    limit = ctx->srch;
58✔
1468
    if (ctx->pass == SCALE_PASS_HORIZONTAL) {
58!
1469
        limit = ctx->srch;
28✔
1470
    } else {
1471
        limit = ctx->dsth;
30✔
1472
    }
1473

1474
    if (index < 0 || index >= limit) {
58!
1475
        return SIXEL_BAD_ARGUMENT;
1476
    }
1477

1478
    y0 = index;
58✔
1479
    y1 = index + ctx->band_span;
58✔
1480
    if (y1 > limit) {
58!
1481
        y1 = limit;
1482
    }
1483

1484
    if (ctx->pass == SCALE_PASS_HORIZONTAL) {
58!
1485
        in1 = ctx->dstw;
28✔
1486
        if (scale_parallel_should_log(ctx, index)) {
28!
1487
            sixel_logger_logf(ctx->logger,
1488
                              role,
1489
                              "scale",
1490
                              "start",
1491
                              index,
1492
                              y1 - 1,
1493
                              y0,
1494
                              y1,
1495
                              in0,
1496
                              in1,
1497
                              "horizontal pass");
1498
        }
1499
        for (y = y0; y < y1; y++) {
10,276!
1500
            scale_horizontal_row(ctx->tmp,
10,248✔
1501
                                 ctx->src,
1502
                                 ctx->srcw,
1503
                                 ctx->dstw,
1504
                                 ctx->depth,
1505
                                 y,
1506
                                 ctx->f_resample,
1507
                                 ctx->n,
1508
                                 ctx->simd_level);
1509
        }
1510
    } else {
1511
        role = "vertical";
30✔
1512
        in1 = ctx->srch;
30✔
1513
        if (scale_parallel_should_log(ctx, index)) {
30!
1514
            sixel_logger_logf(ctx->logger,
1515
                              role,
1516
                              "scale",
1517
                              "start",
1518
                              index,
1519
                              y1 - 1,
1520
                              y0,
1521
                              y1,
1522
                              in0,
1523
                              in1,
1524
                              "vertical pass");
1525
        }
1526
        for (y = y0; y < y1; y++) {
1,944!
1527
            scale_vertical_row(ctx->dst,
1,914✔
1528
                               ctx->tmp,
1,914✔
1529
                               ctx->dstw,
1530
                               ctx->dsth,
1531
                               ctx->depth,
1532
                               ctx->srch,
1533
                               y,
1534
                               ctx->f_resample,
1535
                               ctx->n,
1536
                               ctx->simd_level);
1537
        }
1538
    }
1539

1540
    if (scale_parallel_should_log(ctx, index)) {
58!
1541
        sixel_logger_logf(ctx->logger,
1542
                          role,
1543
                          "scale",
1544
                          "finish",
1545
                          index,
1546
                          y1 - 1,
1547
                          y0,
1548
                          y1,
1549
                          in0,
1550
                          in1,
1551
                          "pass complete");
1552
    }
1553

1554
    return SIXEL_OK;
1555
}
1556

1557
/*
1558
 * Parallel path mirrors the encoder and dither thread selection through
1559
 * sixel_threads_resolve(). Rows are batched into jobs for both passes so the
1560
 * caller can saturate the threadpool without altering the filtering math while
1561
 * reducing queue overhead.
1562
 */
1563
static int
1564
scale_with_resampling_parallel(
67✔
1565
    unsigned char *dst,
1566
    unsigned char const *src,
1567
    int const srcw,
1568
    int const srch,
1569
    int const dstw,
1570
    int const dsth,
1571
    int const depth,
1572
    resample_fn_t const f_resample,
1573
    double const n,
1574
    unsigned char *tmp,
1575
    sixel_logger_t *logger)
1576
{
1577
    scale_parallel_context_t ctx;
67✔
1578
    threadpool_t *pool;
67✔
1579
    tp_job_t job;
67✔
1580
    size_t image_bytes;
67✔
1581
    int threads;
67✔
1582
    int queue_depth;
67✔
1583
    int y;
67✔
1584
    int rc;
67✔
1585
    int logger_ready;
67✔
1586
    int horizontal_span;
67✔
1587
    int vertical_span;
67✔
1588

1589
    image_bytes = (size_t)srcw * (size_t)srch * (size_t)depth;
67✔
1590
    if (image_bytes < scale_parallel_min_bytes()) {
67!
1591
        if (logger != NULL) {
×
1592
            sixel_logger_logf(logger,
1593
                              "controller",
1594
                              "scale",
1595
                              "skip",
1596
                              -1,
1597
                              -1,
1598
                              0,
1599
                              0,
1600
                              0,
1601
                              0,
1602
                              "below threshold bytes=%zu",
1603
                              image_bytes);
1604
        }
1605
        return SIXEL_BAD_ARGUMENT;
1606
    }
1607

1608
    threads = sixel_threads_resolve();
67✔
1609
    if (threads < 2) {
67!
1610
        if (logger != NULL) {
60!
1611
            sixel_logger_logf(logger,
1612
                              "controller",
1613
                              "scale",
1614
                              "skip",
1615
                              -1,
1616
                              -1,
1617
                              0,
1618
                              0,
1619
                              0,
1620
                              0,
1621
                              "threads=%d",
1622
                              threads);
1623
        }
1624
        return SIXEL_BAD_ARGUMENT;
60✔
1625
    }
1626

1627
    logger_ready = logger != NULL && logger->active;
7!
1628
    if (logger_ready) {
×
1629
        sixel_logger_logf(logger,
1630
                          "controller",
1631
                          "scale",
1632
                          "start",
1633
                          -1,
1634
                          -1,
1635
                          0,
1636
                          srch,
1637
                          0,
1638
                          dsth,
1639
                          "parallel scale src=%dx%d dst=%dx%d",
1640
                          srcw,
1641
                          srch,
1642
                          dstw,
1643
                          dsth);
1644
    }
1645

1646
    ctx.dst = dst;
7✔
1647
    ctx.src = src;
7✔
1648
    ctx.tmp = tmp;
7✔
1649
    ctx.srcw = srcw;
7✔
1650
    ctx.srch = srch;
7✔
1651
    ctx.dstw = dstw;
7✔
1652
    ctx.dsth = dsth;
7✔
1653
    ctx.depth = depth;
7✔
1654
    ctx.f_resample = f_resample;
7✔
1655
    ctx.n = n;
7✔
1656
    ctx.simd_level = sixel_scale_simd_level();
7✔
1657
    ctx.logger = logger_ready ? logger : NULL;
7!
1658

1659
    /*
1660
     * Batch rows to reduce queue churn. Prefer the environment override so
1661
     * deployments can pin a consistent span; otherwise derive a default from
1662
     * rows per thread.
1663
     */
1664
    horizontal_span = scale_parallel_band_span(srch, threads);
7✔
1665
    vertical_span = scale_parallel_band_span(dsth, threads);
7✔
1666

1667
    queue_depth = threads * 3;
7✔
1668
    if (queue_depth > srch) {
7!
1669
        queue_depth = srch;
1670
    }
1671
    if (queue_depth < 1) {
7!
1672
        queue_depth = 1;
1673
    }
1674

1675
    ctx.pass = SCALE_PASS_HORIZONTAL;
7✔
1676
    ctx.band_span = horizontal_span;
7✔
1677
    if (logger_ready) {
7!
1678
        sixel_logger_logf(logger,
1679
                          "controller",
1680
                          "scale",
1681
                          "pass_start",
1682
                          -1,
1683
                          0,
1684
                          0,
1685
                          srch,
1686
                          0,
1687
                          ctx.dstw,
1688
                          "horizontal queue=%d threads=%d",
1689
                          queue_depth,
1690
                          threads);
1691
    }
1692
    pool = threadpool_create(threads,
7✔
1693
                             queue_depth,
1694
                             0,
1695
                             scale_parallel_worker,
1696
                             &ctx,
1697
                             NULL);
1698
    if (pool == NULL) {
7!
1699
        return SIXEL_BAD_ALLOCATION;
1700
    }
1701

1702
    for (y = 0; y < srch; y += horizontal_span) {
35!
1703
        job.band_index = y;
28✔
1704
        threadpool_push(pool, job);
28✔
1705
    }
1706
    threadpool_finish(pool);
7✔
1707
    rc = threadpool_get_error(pool);
7✔
1708
    threadpool_destroy(pool);
7✔
1709
    if (rc != SIXEL_OK) {
7!
1710
        return rc;
1711
    }
1712

1713
    if (logger_ready) {
7!
1714
        sixel_logger_logf(logger,
1715
                          "controller",
1716
                          "scale",
1717
                          "pass_finish",
1718
                          -1,
1719
                          srch - 1,
1720
                          0,
1721
                          srch,
1722
                          0,
1723
                          ctx.dstw,
1724
                          "horizontal complete");
1725
    }
1726

1727
    queue_depth = threads * 3;
7✔
1728
    if (queue_depth > dsth) {
7!
1729
        queue_depth = dsth;
1730
    }
1731
    if (queue_depth < 1) {
7!
1732
        queue_depth = 1;
1733
    }
1734

1735
    ctx.pass = SCALE_PASS_VERTICAL;
7✔
1736
    ctx.band_span = vertical_span;
7✔
1737
    if (logger_ready) {
7!
1738
        sixel_logger_logf(logger,
1739
                          "controller",
1740
                          "scale",
1741
                          "pass_start",
1742
                          -1,
1743
                          0,
1744
                          0,
1745
                          dsth,
1746
                          0,
1747
                          ctx.srch,
1748
                          "vertical queue=%d threads=%d",
1749
                          queue_depth,
1750
                          threads);
1751
    }
1752
    pool = threadpool_create(threads,
7✔
1753
                             queue_depth,
1754
                             0,
1755
                             scale_parallel_worker,
1756
                             &ctx,
1757
                             NULL);
1758
    if (pool == NULL) {
7!
1759
        return SIXEL_BAD_ALLOCATION;
1760
    }
1761

1762
    for (y = 0; y < dsth; y += vertical_span) {
37!
1763
        job.band_index = y;
30✔
1764
        threadpool_push(pool, job);
30✔
1765
    }
1766
    threadpool_finish(pool);
7✔
1767
    rc = threadpool_get_error(pool);
7✔
1768
    threadpool_destroy(pool);
7✔
1769

1770
    if (logger_ready) {
7!
1771
        sixel_logger_logf(logger,
1772
                          "controller",
1773
                          "scale",
1774
                          "pass_finish",
1775
                          -1,
1776
                          dsth - 1,
1777
                          0,
1778
                          dsth,
1779
                          0,
1780
                          ctx.srch,
1781
                          "vertical complete rc=%d",
1782
                          rc);
1783
        sixel_logger_logf(logger,
1784
                          "controller",
1785
                          "scale",
1786
                          "finish",
1787
                          -1,
1788
                          dsth - 1,
1789
                          0,
1790
                          dsth,
1791
                          0,
1792
                          ctx.srch,
1793
                          "parallel scale status=%d",
1794
                          rc);
1795
    }
1796

1797
    return rc;
1798
}
1799
#endif /* SIXEL_ENABLE_THREADS */
1800

1801
/*
1802
 * Allocate shared scratch storage and attempt the parallel pipeline first so
1803
 * larger inputs benefit from threading while smaller ones retain the serial
1804
 * behavior.
1805
 */
1806
static void
1807
scale_with_resampling(
90✔
1808
    unsigned char *dst,
1809
    unsigned char const *src,
1810
    int const srcw,
1811
    int const srch,
1812
    int const dstw,
1813
    int const dsth,
1814
    int const depth,
1815
    resample_fn_t const f_resample,
1816
    double n,
1817
    sixel_allocator_t *allocator)
1818
{
1819
    unsigned char *tmp;
90✔
1820
    size_t tmp_size;
90✔
1821
#if SIXEL_ENABLE_THREADS
1822
    int rc;
67✔
1823
    sixel_logger_t logger;
67✔
1824
    int logger_prepared;
67✔
1825
#endif
1826

1827
#if SIXEL_ENABLE_THREADS
1828
    sixel_logger_init(&logger);
67✔
1829
    logger_prepared = 0;
67✔
1830
    (void)sixel_logger_prepare_env(&logger);
67✔
1831
    logger_prepared = logger.active;
67✔
1832
#endif
1833

1834
    tmp_size = (size_t)dstw * (size_t)srch * (size_t)depth;
90✔
1835
    tmp = (unsigned char *)sixel_allocator_malloc(allocator, tmp_size);
90✔
1836
    if (tmp == NULL) {
90!
1837
#if SIXEL_ENABLE_THREADS
1838
        if (logger_prepared) {
×
1839
            sixel_logger_close(&logger);
1840
        }
1841
#endif
1842
        return;
7✔
1843
    }
1844

1845
#if SIXEL_ENABLE_THREADS
1846
    rc = scale_with_resampling_parallel(dst,
134!
1847
                                        src,
1848
                                        srcw,
1849
                                        srch,
1850
                                        dstw,
1851
                                        dsth,
1852
                                        depth,
1853
                                        f_resample,
1854
                                        n,
1855
                                        tmp,
1856
                                        logger_prepared
1!
1857
                                            ? &logger
1858
                                            : NULL);
1859
    if (rc == SIXEL_OK) {
67!
1860
        sixel_allocator_free(allocator, tmp);
7✔
1861
        if (logger_prepared) {
7!
1862
            sixel_logger_close(&logger);
1863
        }
1864
        return;
7✔
1865
    }
1866

1867
    if (logger_prepared) {
60!
1868
        sixel_logger_logf(&logger,
1869
                          "controller",
1870
                          "scale",
1871
                          "fallback",
1872
                          -1,
1873
                          -1,
1874
                          0,
1875
                          dsth,
1876
                          0,
1877
                          srch,
1878
                          "parallel rc=%d",
1879
                          rc);
1880
    }
1881
#endif
1882

1883
    scale_with_resampling_serial(dst,
83✔
1884
                                 src,
1885
                                 srcw,
1886
                                 srch,
1887
                                 dstw,
1888
                                 dsth,
1889
                                 depth,
1890
                                 f_resample,
1891
                                 n,
1892
                                 tmp);
1893

1894
    sixel_allocator_free(allocator, tmp);
83✔
1895
#if SIXEL_ENABLE_THREADS
1896
    if (logger_prepared) {
60!
1897
        sixel_logger_close(&logger);
1898
    }
1899
#endif
1900
}
1!
1901

1902
/*
1903
 * Floating-point scaler mirrors the byte-path SSE2 usage. Keep it noinline
1904
 * on i386 so the SIXEL_ALIGN_STACK prologue stays in place when SSE2 locals
1905
 * need to spill to the stack.
1906
 */
1907
static SIXEL_ALIGN_STACK SIXEL_NO_INLINE void
1908
scale_with_resampling_float32(
39✔
1909
    float *dst,
1910
    float const *src,
1911
    int const srcw,
1912
    int const srch,
1913
    int const dstw,
1914
    int const dsth,
1915
    int const depth,
1916
    resample_fn_t const f_resample,
1917
    double n,
1918
    sixel_allocator_t *allocator)
1919
{
1920
    int w;
39✔
1921
    int h;
39✔
1922
    int x;
39✔
1923
    int y;
39✔
1924
    int i;
39✔
1925
    int pos;
39✔
1926
    int x_first;
39✔
1927
    int x_last;
39✔
1928
    int y_first;
39✔
1929
    int y_last;
39✔
1930
    double center_x;
39✔
1931
    double center_y;
39✔
1932
    double diff_x;
39✔
1933
    double diff_y;
39✔
1934
    double weight;
39✔
1935
    double total;
39✔
1936
    double offsets[8];
39✔
1937
    float *tmp;
39✔
1938
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
1939
    float vecbuf[4];
30✔
1940
#endif
1941
    int simd_level;
39✔
1942
#if defined(SIXEL_USE_AVX512)
1943
    __m512 acc512;
1944
    __m512 pix512;
1945
#endif
1946
#if defined(SIXEL_USE_AVX2) || defined(SIXEL_USE_AVX)
1947
    __m256 acc256;
1948
#endif
1949
#if defined(SIXEL_USE_SSE2)
1950
    __m128 acc128;
19✔
1951
    __m128 pixf128;
19✔
1952
    __m128 wv128;
19✔
1953
    __m128 scalev128;
19✔
1954
    __m128 minv128;
19✔
1955
    __m128 maxv128;
19✔
1956
#elif defined(SIXEL_USE_NEON)
1957
    float32x4_t acc_neon;
11✔
1958
    float32x4_t pixf_neon;
11✔
1959
    float32x4_t wv_neon;
11✔
1960
    float32x4_t scalev_neon;
11✔
1961
    float32x4_t minv_neon;
11✔
1962
    float32x4_t maxv_neon;
11✔
1963
#endif
1964

1965
    tmp = (float *)sixel_allocator_malloc(
78✔
1966
        allocator,
1967
        (size_t)(dstw * srch * depth * (int)sizeof(float)));
39✔
1968
    if (tmp == NULL) {
39!
1969
        return;
×
1970
    }
1971

1972
    simd_level = sixel_scale_simd_level();
39✔
1973
#if !defined(SIXEL_USE_AVX512) && !defined(SIXEL_USE_AVX2) && \
1974
    !defined(SIXEL_USE_AVX) && !defined(SIXEL_USE_SSE2) && \
1975
    !defined(SIXEL_USE_NEON)
1976
    /*
1977
     * GCC i686 builds can reach this function with every SIMD backend
1978
     * compiled out; acknowledge the detection result to avoid an unused
1979
     * write while keeping the signature intact.
1980
     */
1981
    (void)simd_level;
9✔
1982
#endif
1983

1984
    for (y = 0; y < srch; y++) {
17,411✔
1985
        for (w = 0; w < dstw; w++) {
2,503,311✔
1986
            total = 0.0;
9,943,912✔
1987
            for (i = 0; i < depth; i++) {
9,943,912✔
1988
                offsets[i] = 0.0;
7,457,934✔
1989
            }
1990

1991
        if (dstw >= srcw) {
2,485,978✔
1992
            center_x = (w + 0.5) * srcw / dstw;
10✔
1993
            x_first = MAX((int)(center_x - n), 0);
10✔
1994
            x_last = MIN((int)(center_x + n), srcw - 1);
10✔
1995
        } else {
1996
            center_x = w + 0.5;
2,485,968✔
1997
            x_first = MAX((int)floor((center_x - n) * srcw / dstw), 0);
2,485,968✔
1998
            x_last = MIN((int)floor((center_x + n) * srcw / dstw),
2,485,968✔
1999
                         srcw - 1);
2000
        }
2001

2002
#if defined(SIXEL_USE_AVX512)
2003
            if (depth == 3 &&
2004
                simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
2005
                acc512 = sixel_avx512_zero_ps();
2006

2007
                for (x = x_first; x <= x_last; x++) {
2008
                    diff_x = (dstw >= srcw)
2009
                                 ? (x + 0.5) - center_x
2010
                                 : (x + 0.5) * srcw / dstw - center_x;
2011
                    weight = f_resample(fabs(diff_x));
2012
                    pos = (y * srcw + x) * depth;
2013
                    pix512 = sixel_avx512_load_rgb_f32(src + pos);
2014
                    acc512 = sixel_avx512_muladd_ps(
2015
                        acc512,
2016
                        pix512,
2017
                        (float)weight);
2018
                    total += weight;
2019
                }
2020
                if (total > 0.0) {
2021
                    pos = (y * dstw + w) * depth;
2022
                    sixel_avx512_store_rgb_f32(&acc512, total, tmp + pos);
2023
                }
2024
            } else
2025
#endif
2026
#if defined(SIXEL_USE_AVX2)
2027
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
2028
                acc256 = sixel_avx2_zero_ps();
2029

2030
                for (x = x_first; x <= x_last; x++) {
2031
                    diff_x = (dstw >= srcw)
2032
                                 ? (x + 0.5) - center_x
2033
                                 : (x + 0.5) * srcw / dstw - center_x;
2034
                    weight = f_resample(fabs(diff_x));
2035
                    pos = (y * srcw + x) * depth;
2036
                    acc256 = sixel_avx2_muladd_ps(
2037
                        acc256,
2038
                        sixel_avx2_load_rgb_f32(src + pos),
2039
                        (float)weight);
2040
                    total += weight;
2041
                }
2042
                if (total > 0.0) {
2043
                    pos = (y * dstw + w) * depth;
2044
                    sixel_avx2_store_rgb_f32(acc256, total, tmp + pos);
2045
                }
2046
            } else
2047
#endif
2048
#if defined(SIXEL_USE_AVX)
2049
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
2050
                acc256 = sixel_avx_zero_ps();
2051

2052
                for (x = x_first; x <= x_last; x++) {
2053
                    diff_x = (dstw >= srcw)
2054
                                 ? (x + 0.5) - center_x
2055
                                 : (x + 0.5) * srcw / dstw - center_x;
2056
                    weight = f_resample(fabs(diff_x));
2057
                    pos = (y * srcw + x) * depth;
2058
                    acc256 = sixel_avx_muladd_ps(
2059
                        acc256,
2060
                        sixel_avx_load_rgb_f32(src + pos),
2061
                        (float)weight);
2062
                    total += weight;
2063
                }
2064
                if (total > 0.0) {
2065
                    pos = (y * dstw + w) * depth;
2066
                    sixel_avx_store_rgb_f32(acc256, total, tmp + pos);
2067
                }
2068
            } else
2069
#endif
2070
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
2071
            if (depth == 3
1,905,476!
2072
# if defined(SIXEL_USE_SSE2)
2073
                && simd_level >= SIXEL_SIMD_LEVEL_SSE2
1,161,006!
2074
# elif defined(SIXEL_USE_NEON)
2075
                && simd_level >= SIXEL_SIMD_LEVEL_NEON
744,470!
2076
# endif
2077
                ) {
2078
#if defined(SIXEL_USE_SSE2)
2079
                acc128 = _mm_setzero_ps();
2080
                minv128 = _mm_set1_ps(0.0f);
2081
                maxv128 = _mm_set1_ps(1.0f);
2082
#elif defined(SIXEL_USE_NEON)
2083
                acc_neon = vdupq_n_f32(0.0f);
2084
                minv_neon = vdupq_n_f32(0.0f);
2085
                maxv_neon = vdupq_n_f32(1.0f);
2086
#endif
2087
                for (x = x_first; x <= x_last; x++) {
61,135,966✔
2088
                    diff_x = (dstw >= srcw)
118,460,980✔
2089
                                 ? (x + 0.5) - center_x
8✔
2090
                                 : (x + 0.5) * srcw / dstw - center_x;
59,230,490✔
2091
                    weight = f_resample(fabs(diff_x));
59,230,490✔
2092
                    pos = (y * srcw + x) * depth;
59,230,490✔
2093
                    const float *psrc = src + pos;
59,230,490✔
2094
#if defined(SIXEL_USE_SSE2)
2095
                    pixf128 = _mm_set_ps(
17,221,506✔
2096
                        0.0f, psrc[2], psrc[1], psrc[0]);
2097
                    wv128 = _mm_set1_ps((float)weight);
17,221,506✔
2098
                    acc128 = _mm_add_ps(acc128,
17,221,506✔
2099
                                        _mm_mul_ps(pixf128, wv128));
2100
#else /* NEON */
2101
                    /*
2102
                     * Expand the RGB triple into a NEON vector without
2103
                     * brace initialization to keep older toolchains
2104
                     * happy.
2105
                     */
2106
                    pixf_neon = vdupq_n_f32(0.0f);
42,008,984✔
2107
                    pixf_neon = vsetq_lane_f32(psrc[0], pixf_neon, 0);
42,008,984✔
2108
                    pixf_neon = vsetq_lane_f32(psrc[1], pixf_neon, 1);
42,008,984✔
2109
                    pixf_neon = vsetq_lane_f32(psrc[2], pixf_neon, 2);
42,008,984✔
2110
                    wv_neon = vdupq_n_f32((float)weight);
42,008,984✔
2111
                    acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
42,008,984✔
2112
#endif
2113
                    total += weight;
59,230,490✔
2114
                }
2115
                if (total > 0.0) {
1,905,476✔
2116
#if defined(SIXEL_USE_SSE2)
2117
                    scalev128 = _mm_set1_ps((float)(1.0 / total));
128,706✔
2118
                    acc128 = _mm_mul_ps(acc128, scalev128);
128,706✔
2119
                    acc128 = _mm_max_ps(minv128,
128,706✔
2120
                                        _mm_min_ps(acc128, maxv128));
2121
                    _mm_storeu_ps(vecbuf, acc128);
128,706✔
2122
#else /* NEON */
2123
                    scalev_neon = vdupq_n_f32(
64,352✔
2124
                        (float)(1.0 / total));
64,352✔
2125
                    acc_neon = vmulq_f32(acc_neon, scalev_neon);
64,352✔
2126
                    acc_neon = vmaxq_f32(minv_neon,
64,352✔
2127
                                         vminq_f32(acc_neon, maxv_neon));
2128
                    vst1q_f32(vecbuf, acc_neon);
64,352✔
2129
#endif
2130
                    pos = (y * dstw + w) * depth;
193,058✔
2131
                    tmp[pos + 0] = vecbuf[0];
193,058✔
2132
                    tmp[pos + 1] = vecbuf[1];
193,058✔
2133
                    tmp[pos + 2] = vecbuf[2];
193,058✔
2134
                }
2135
            } else
2136
#endif
2137
            {
2138
                for (x = x_first; x <= x_last; x++) {
9,191,254!
2139
                    diff_x = (dstw >= srcw)
17,221,504!
2140
                                 ? (x + 0.5) - center_x
2✔
2141
                                 : (x + 0.5) * srcw / dstw - center_x;
8,610,752!
2142
                    weight = f_resample(fabs(diff_x));
8,610,752✔
2143
                    for (i = 0; i < depth; i++) {
43,053,760!
2144
                        pos = (y * srcw + x) * depth + i;
25,832,256✔
2145
                        offsets[i] += src[pos] * weight;
25,832,256✔
2146
                    }
2147
                    total += weight;
8,610,752✔
2148
                }
2149

2150
                if (total > 0.0) {
580,502!
2151
                    for (i = 0; i < depth; i++) {
257,408!
2152
                        pos = (y * dstw + w) * depth + i;
193,056✔
2153
                        tmp[pos] = sixel_clamp_unit_f32(
193,056✔
2154
                            (float)(offsets[i] / total));
193,056!
2155
                    }
2156
                }
2157
            }
2158
        }
2159
    }
2160

2161
    for (h = 0; h < dsth; h++) {
4,066✔
2162
        for (w = 0; w < dstw; w++) {
700,015✔
2163
            total = 0.0;
2,783,952✔
2164
            for (i = 0; i < depth; i++) {
2,783,952✔
2165
                offsets[i] = 0.0;
2,087,964✔
2166
            }
2167

2168
            if (dsth >= srch) {
695,988✔
2169
                center_y = (h + 0.5) * srch / dsth;
20✔
2170
                y_first = MAX((int)(center_y - n), 0);
20✔
2171
                y_last = MIN((int)(center_y + n), srch - 1);
20✔
2172
            } else {
2173
                center_y = h + 0.5;
695,968✔
2174
                y_first = MAX((int)floor((center_y - n) * srch / dsth), 0);
695,968✔
2175
                y_last = MIN((int)floor((center_y + n) * srch / dsth),
695,968✔
2176
                             srch - 1);
2177
            }
2178

2179
#if defined(SIXEL_USE_AVX512)
2180
            if (depth == 3 &&
2181
                simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
2182
                acc512 = sixel_avx512_zero_ps();
2183

2184
                for (y = y_first; y <= y_last; y++) {
2185
                    diff_y = (dsth >= srch)
2186
                                 ? (y + 0.5) - center_y
2187
                                 : (y + 0.5) * dsth / srch - center_y;
2188
                    weight = f_resample(fabs(diff_y));
2189
                    pos = (y * dstw + w) * depth;
2190
                    pix512 = sixel_avx512_load_rgb_f32(tmp + pos);
2191
                    acc512 = sixel_avx512_muladd_ps(
2192
                        acc512,
2193
                        pix512,
2194
                        (float)weight);
2195
                    total += weight;
2196
                }
2197
                if (total > 0.0) {
2198
                    pos = (h * dstw + w) * depth;
2199
                    sixel_avx512_store_rgb_f32(&acc512, total, dst + pos);
2200
                }
2201
            } else
2202
#endif
2203
#if defined(SIXEL_USE_AVX2)
2204
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
2205
                acc256 = sixel_avx2_zero_ps();
2206

2207
                for (y = y_first; y <= y_last; y++) {
2208
                    diff_y = (dsth >= srch)
2209
                                 ? (y + 0.5) - center_y
2210
                                 : (y + 0.5) * dsth / srch - center_y;
2211
                    weight = f_resample(fabs(diff_y));
2212
                    pos = (y * dstw + w) * depth;
2213
                    acc256 = sixel_avx2_muladd_ps(
2214
                        acc256,
2215
                        sixel_avx2_load_rgb_f32(tmp + pos),
2216
                        (float)weight);
2217
                    total += weight;
2218
                }
2219
                if (total > 0.0) {
2220
                    pos = (h * dstw + w) * depth;
2221
                    sixel_avx2_store_rgb_f32(acc256, total, dst + pos);
2222
                }
2223
            } else
2224
#endif
2225
#if defined(SIXEL_USE_AVX)
2226
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
2227
                acc256 = sixel_avx_zero_ps();
2228

2229
                for (y = y_first; y <= y_last; y++) {
2230
                    diff_y = (dsth >= srch)
2231
                                 ? (y + 0.5) - center_y
2232
                                 : (y + 0.5) * dsth / srch - center_y;
2233
                    weight = f_resample(fabs(diff_y));
2234
                    pos = (y * dstw + w) * depth;
2235
                    acc256 = sixel_avx_muladd_ps(
2236
                        acc256,
2237
                        sixel_avx_load_rgb_f32(tmp + pos),
2238
                        (float)weight);
2239
                    total += weight;
2240
                }
2241
                if (total > 0.0) {
2242
                    pos = (h * dstw + w) * depth;
2243
                    sixel_avx_store_rgb_f32(acc256, total, dst + pos);
2244
                }
2245
            } else
2246
#endif
2247
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
2248
            if (depth == 3
522,804!
2249
# if defined(SIXEL_USE_SSE2)
2250
                && simd_level >= SIXEL_SIMD_LEVEL_SSE2
346,372!
2251
# elif defined(SIXEL_USE_NEON)
2252
                && simd_level >= SIXEL_SIMD_LEVEL_NEON
176,432!
2253
# endif
2254
                ) {
2255
#if defined(SIXEL_USE_SSE2)
2256
                acc128 = _mm_setzero_ps();
2257
                minv128 = _mm_set1_ps(0.0f);
2258
                maxv128 = _mm_set1_ps(1.0f);
2259
#elif defined(SIXEL_USE_NEON)
2260
                acc_neon = vdupq_n_f32(0.0f);
2261
                minv_neon = vdupq_n_f32(0.0f);
2262
                maxv_neon = vdupq_n_f32(1.0f);
2263
#endif
2264
                for (y = y_first; y <= y_last; y++) {
8,354,468✔
2265
                    diff_y = (dsth >= srch)
15,663,328✔
2266
                                 ? (y + 0.5) - center_y
16✔
2267
                                 : (y + 0.5) * dsth / srch - center_y;
7,831,664✔
2268
                    weight = f_resample(fabs(diff_y));
7,831,664✔
2269
                    pos = (y * dstw + w) * depth;
7,831,664✔
2270
                    const float *psrc = tmp + pos;
7,831,664✔
2271
#if defined(SIXEL_USE_SSE2)
2272
                    pixf128 = _mm_set_ps(
4,831,852✔
2273
                        0.0f, psrc[2], psrc[1], psrc[0]);
2274
                    wv128 = _mm_set1_ps((float)weight);
4,831,852✔
2275
                    acc128 = _mm_add_ps(acc128,
4,831,852✔
2276
                                        _mm_mul_ps(pixf128, wv128));
2277
#else /* NEON */
2278
                    /*
2279
                     * Expand the RGB triple into a NEON vector without
2280
                     * brace initialization to keep older toolchains
2281
                     * happy.
2282
                     */
2283
                    pixf_neon = vdupq_n_f32(0.0f);
2,999,812✔
2284
                    pixf_neon = vsetq_lane_f32(psrc[0], pixf_neon, 0);
2,999,812✔
2285
                    pixf_neon = vsetq_lane_f32(psrc[1], pixf_neon, 1);
2,999,812✔
2286
                    pixf_neon = vsetq_lane_f32(psrc[2], pixf_neon, 2);
2,999,812✔
2287
                    wv_neon = vdupq_n_f32((float)weight);
2,999,812✔
2288
                    acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
2,999,812✔
2289
#endif
2290
                    total += weight;
7,831,664✔
2291
                }
2292
                if (total > 0.0) {
522,804!
2293
#if defined(SIXEL_USE_SSE2)
2294
                    scalev128 = _mm_set1_ps((float)(1.0 / total));
346,372✔
2295
                    acc128 = _mm_mul_ps(acc128, scalev128);
346,372✔
2296
                    acc128 = _mm_max_ps(minv128,
346,372✔
2297
                                        _mm_min_ps(acc128, maxv128));
2298
                    _mm_storeu_ps(vecbuf, acc128);
346,372✔
2299
#else /* NEON */
2300
                    scalev_neon = vdupq_n_f32(
176,432✔
2301
                        (float)(1.0 / total));
176,432✔
2302
                    acc_neon = vmulq_f32(acc_neon, scalev_neon);
176,432✔
2303
                    acc_neon = vmaxq_f32(minv_neon,
176,432✔
2304
                                         vminq_f32(acc_neon, maxv_neon));
2305
                    vst1q_f32(vecbuf, acc_neon);
176,432✔
2306
#endif
2307
                    pos = (h * dstw + w) * depth;
522,804✔
2308
                    dst[pos + 0] = vecbuf[0];
522,804✔
2309
                    dst[pos + 1] = vecbuf[1];
522,804✔
2310
                    dst[pos + 2] = vecbuf[2];
522,804✔
2311
                }
2312
            } else
2313
#endif
2314
            {
2315
                for (y = y_first; y <= y_last; y++) {
2,589,108!
2316
                    diff_y = (dsth >= srch)
4,831,848!
2317
                                 ? (y + 0.5) - center_y
4✔
2318
                                 : (y + 0.5) * dsth / srch - center_y;
2,415,924!
2319
                    weight = f_resample(fabs(diff_y));
2,415,924✔
2320
                    for (i = 0; i < depth; i++) {
12,079,620!
2321
                        pos = (y * dstw + w) * depth + i;
7,247,772✔
2322
                        offsets[i] += tmp[pos] * weight;
7,247,772✔
2323
                    }
2324
                    total += weight;
2,415,924✔
2325
                }
2326

2327
                if (total > 0.0) {
173,184!
2328
                    for (i = 0; i < depth; i++) {
692,736!
2329
                        pos = (h * dstw + w) * depth + i;
519,552✔
2330
                        dst[pos] = sixel_clamp_unit_f32(
519,552✔
2331
                            (float)(offsets[i] / total));
519,552!
2332
                    }
2333
                }
2334
            }
2335
        }
2336
    }
2337

2338
    sixel_allocator_free(allocator, tmp);
39✔
2339
}
1!
2340

2341

2342
SIXELAPI int
2343
sixel_helper_scale_image(
99✔
2344
    unsigned char       /* out */ *dst,
2345
    unsigned char const /* in */  *src,                   /* source image data */
2346
    int                 /* in */  srcw,                   /* source image width */
2347
    int                 /* in */  srch,                   /* source image height */
2348
    int                 /* in */  pixelformat,            /* one of enum pixelFormat */
2349
    int                 /* in */  dstw,                   /* destination image width */
2350
    int                 /* in */  dsth,                   /* destination image height */
2351
    int                 /* in */  method_for_resampling,  /* one of methodForResampling */
2352
    sixel_allocator_t   /* in */  *allocator)             /* allocator object */
2353
{
2354
    /*
2355
     * Convert the source image to RGB24 if necessary and scale it to the
2356
     * requested destination size.  The caller supplies an allocator used
2357
     * for any temporary buffers required during conversion or filtering.
2358
     */
2359
    int const depth = sixel_helper_compute_depth(pixelformat);
99✔
2360
    unsigned char *new_src = NULL;  /* optional converted source buffer */
99✔
2361
    int nret;
99✔
2362
    int new_pixelformat;
99✔
2363

2364
    /* ensure the scaler operates on RGB triples */
2365
    if (depth != 3) {
99!
2366
        new_src = (unsigned char *)sixel_allocator_malloc(allocator,
×
2367
                                                          (size_t)(srcw * srch * 3));
×
2368
        if (new_src == NULL) {
×
2369
            return (-1);
2370
        }
2371
        nret = sixel_helper_normalize_pixelformat(new_src,
×
2372
                                                  &new_pixelformat,
2373
                                                  src, pixelformat,
2374
                                                  srcw, srch);
2375
        if (nret != 0) {
×
2376
            sixel_allocator_free(allocator, new_src);
×
2377
            return (-1);
×
2378
        }
2379

2380
        src = new_src;  /* use converted buffer from here on */
2381
    } else {
2382
        new_pixelformat = pixelformat;
99✔
2383
    }
2384

2385
    /* choose re-sampling strategy */
2386
    switch (method_for_resampling) {
99!
2387
    case SIXEL_RES_NEAREST:
9✔
2388
        scale_without_resampling(dst, src, srcw, srch, dstw, dsth, depth);
9✔
2389
        break;
9✔
2390
    case SIXEL_RES_GAUSSIAN:
4✔
2391
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
4✔
2392
                              gaussian, 1.0, allocator);
2393
        break;
4✔
2394
    case SIXEL_RES_HANNING:
×
2395
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
×
2396
                              hanning, 1.0, allocator);
2397
        break;
×
2398
    case SIXEL_RES_HAMMING:
4✔
2399
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
4✔
2400
                              hamming, 1.0, allocator);
2401
        break;
4✔
2402
    case SIXEL_RES_WELSH:
×
2403
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
×
2404
                              welsh, 1.0, allocator);
2405
        break;
×
2406
    case SIXEL_RES_BICUBIC:
4✔
2407
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
4✔
2408
                              bicubic, 2.0, allocator);
2409
        break;
4✔
2410
    case SIXEL_RES_LANCZOS2:
×
2411
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
×
2412
                              lanczos2, 2.0, allocator);
2413
        break;
×
2414
    case SIXEL_RES_LANCZOS3:
3✔
2415
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
3✔
2416
                              lanczos3, 3.0, allocator);
2417
        break;
3✔
2418
    case SIXEL_RES_LANCZOS4:
×
2419
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
×
2420
                              lanczos4, 4.0, allocator);
2421
        break;
×
2422
    case SIXEL_RES_BILINEAR:
75✔
2423
    default:
2424
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
75✔
2425
                              bilinear, 1.0, allocator);
2426
        break;
75✔
2427
    }
2428

2429
    /* release temporary copy created for pixel-format normalization */
2430
    sixel_allocator_free(allocator, new_src);
99✔
2431
    return 0;
99✔
2432
}
2433

2434
SIXELAPI int
2435
sixel_helper_scale_image_float32(
59✔
2436
    float             /* out */ *dst,
2437
    float const       /* in */  *src,
2438
    int               /* in */  srcw,
2439
    int               /* in */  srch,
2440
    int               /* in */  pixelformat,
2441
    int               /* in */  dstw,
2442
    int               /* in */  dsth,
2443
    int               /* in */  method_for_resampling,
2444
    sixel_allocator_t /* in */  *allocator)
2445
{
2446
    int depth;
59✔
2447
    int depth_bytes;
59✔
2448

2449
    depth_bytes = sixel_helper_compute_depth(pixelformat);
59✔
2450
    if (depth_bytes <= 0) {
59!
2451
        return (-1);
2452
    }
2453

2454
    depth = depth_bytes / (int)sizeof(float);
59✔
2455
    if (depth * (int)sizeof(float) != depth_bytes) {
59!
2456
        return (-1);
2457
    }
2458

2459
    switch (method_for_resampling) {
59!
2460
    case SIXEL_RES_NEAREST:
20✔
2461
        scale_without_resampling_float32(
20✔
2462
            dst, src, srcw, srch, dstw, dsth, depth);
2463
        break;
20✔
2464
    case SIXEL_RES_GAUSSIAN:
×
2465
        scale_with_resampling_float32(
×
2466
            dst, src, srcw, srch, dstw, dsth, depth,
2467
            gaussian, 1.0, allocator);
2468
        break;
×
2469
    case SIXEL_RES_HANNING:
4✔
2470
        scale_with_resampling_float32(
4✔
2471
            dst, src, srcw, srch, dstw, dsth, depth,
2472
            hanning, 1.0, allocator);
2473
        break;
4✔
2474
    case SIXEL_RES_HAMMING:
×
2475
        scale_with_resampling_float32(
×
2476
            dst, src, srcw, srch, dstw, dsth, depth,
2477
            hamming, 1.0, allocator);
2478
        break;
×
2479
    case SIXEL_RES_WELSH:
4✔
2480
        scale_with_resampling_float32(
4✔
2481
            dst, src, srcw, srch, dstw, dsth, depth,
2482
            welsh, 1.0, allocator);
2483
        break;
4✔
2484
    case SIXEL_RES_BICUBIC:
5✔
2485
        scale_with_resampling_float32(
5✔
2486
            dst, src, srcw, srch, dstw, dsth, depth,
2487
            bicubic, 2.0, allocator);
2488
        break;
5✔
2489
    case SIXEL_RES_LANCZOS2:
8✔
2490
        scale_with_resampling_float32(
8✔
2491
            dst, src, srcw, srch, dstw, dsth, depth,
2492
            lanczos2, 2.0, allocator);
2493
        break;
8✔
2494
    case SIXEL_RES_LANCZOS3:
5✔
2495
        scale_with_resampling_float32(
5✔
2496
            dst, src, srcw, srch, dstw, dsth, depth,
2497
            lanczos3, 3.0, allocator);
2498
        break;
5✔
2499
    case SIXEL_RES_LANCZOS4:
4✔
2500
        scale_with_resampling_float32(
4✔
2501
            dst, src, srcw, srch, dstw, dsth, depth,
2502
            lanczos4, 4.0, allocator);
2503
        break;
4✔
2504
    case SIXEL_RES_BILINEAR:
9✔
2505
    default:
2506
        scale_with_resampling_float32(
9✔
2507
            dst, src, srcw, srch, dstw, dsth, depth,
2508
            bilinear, 1.0, allocator);
2509
        break;
9✔
2510
    }
2511

2512
    return 0;
2513
}
2514

2515
#if HAVE_TESTS
2516

2517
static void
2518
reference_scale(
×
2519
    unsigned char *dst,
2520
    unsigned char const *src,
2521
    int const srcw,
2522
    int const srch,
2523
    int const dstw,
2524
    int const dsth,
2525
    int const depth)
2526
{
2527
    int w;
×
2528
    int h;
×
2529
    int x;
×
2530
    int y;
×
2531
    int i;
×
2532
    int pos;
×
2533

2534
    for (h = 0; h < dsth; h++) {
×
2535
        for (w = 0; w < dstw; w++) {
×
2536
            x = (long)w * srcw / dstw;
×
2537
            y = (long)h * srch / dsth;
×
2538
            for (i = 0; i < depth; i++) {
×
2539
                pos = (y * srcw + x) * depth + i;
×
2540
                dst[(h * dstw + w) * depth + i] = src[pos];
×
2541
            }
2542
        }
2543
    }
2544
}
×
2545

2546
static int
2547
test_without_resampling_case(
×
2548
    int srcw,
2549
    int srch,
2550
    int dstw,
2551
    int dsth,
2552
    int depth)
2553
{
2554
    int nret = EXIT_FAILURE;
×
2555
    size_t srcsize = (size_t)srcw * srch * depth;
×
2556
    size_t dstsize = (size_t)dstw * dsth * depth;
×
2557
    unsigned char *src = NULL;
×
2558
    unsigned char *ref = NULL;
×
2559
    unsigned char *out = NULL;
×
2560
    size_t i;
×
2561

2562
    src = (unsigned char *)malloc(srcsize);
×
2563
    ref = (unsigned char *)malloc(dstsize);
×
2564
    out = (unsigned char *)malloc(dstsize);
×
2565
    if (src == NULL || ref == NULL || out == NULL) {
×
2566
        goto end;
×
2567
    }
2568

2569
    for (i = 0; i < srcsize; ++i) {
×
2570
        src[i] = (unsigned char)(i & 0xff);
×
2571
    }
2572

2573
    reference_scale(ref, src, srcw, srch, dstw, dsth, depth);
×
2574
    scale_without_resampling(out, src, srcw, srch, dstw, dsth, depth);
×
2575

2576
    if (memcmp(ref, out, dstsize) != 0) {
×
2577
        goto end;
×
2578
    }
2579

2580
    nret = EXIT_SUCCESS;
2581

2582
end:
×
2583
    free(src);
×
2584
    free(ref);
×
2585
    free(out);
×
2586
    return nret;
×
2587
}
2588

2589
SIXELAPI int
2590
sixel_scale_tests_main(void)
×
2591
{
2592
    int nret = EXIT_FAILURE;
×
2593
    size_t i;
×
2594
    struct {
×
2595
        int srcw;
2596
        int srch;
2597
        int dstw;
2598
        int dsth;
2599
        int depth;
2600
    } cases[] = {
×
2601
        {8, 4, 3, 7, 3},
2602
        {13, 9, 17, 6, 4}
2603
    };
2604

2605
    for (i = 0; i < sizeof(cases) / sizeof(cases[0]); ++i) {
×
2606
        nret = test_without_resampling_case(cases[i].srcw,
×
2607
                                            cases[i].srch,
2608
                                            cases[i].dstw,
2609
                                            cases[i].dsth,
2610
                                            cases[i].depth);
2611
        if (nret != EXIT_SUCCESS) {
×
2612
            goto end;
×
2613
        }
2614
    }
2615

2616
    nret = EXIT_SUCCESS;
2617

2618
end:
×
2619
    return nret;
×
2620
}
2621

2622
#endif /* HAVE_TESTS */
2623

2624
#if defined(__GNUC__) && !defined(__clang__) && !defined(__PCC__)
2625
# pragma GCC diagnostic pop
2626
#endif
2627

2628
/* emacs Local Variables:      */
2629
/* emacs mode: c               */
2630
/* emacs tab-width: 4          */
2631
/* emacs indent-tabs-mode: nil */
2632
/* emacs c-basic-offset: 4     */
2633
/* emacs End:                  */
2634
/* vim: set expandtab ts=4 sts=4 sw=4 : */
2635
/* EOF */
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc