• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

saitoha / libsixel / 20466639304

23 Dec 2025 04:53PM UTC coverage: 51.46% (-6.3%) from 57.773%
20466639304

push

github

saitoha
build: fix windows find path in images meson build

14511 of 44933 branches covered (32.29%)

21089 of 40981 relevant lines covered (51.46%)

3915123.44 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.93
/src/scale.c
1
/*
2
 * SPDX-License-Identifier: MIT
3
 *
4
 * Copyright (c) 2021-2025 libsixel developers. See `AUTHORS`.
5
 * Copyright (c) 2014-2016 Hayaki Saito
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
8
 * this software and associated documentation files (the "Software"), to deal in
9
 * the Software without restriction, including without limitation the rights to
10
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
11
 * the Software, and to permit persons to whom the Software is furnished to do so,
12
 * subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in all
15
 * copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
19
 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
20
 * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
21
 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
 */
24

25
#include "config.h"
26

27
/* STDC_HEADERS */
28
#include <stdlib.h>
29

30
#if HAVE_ERRNO_H
31
# include <errno.h>
32
#endif  /* HAVE_ERRNO_H */
33
#if HAVE_LIMITS_H
34
# include <limits.h>
35
#endif  /* HAVE_LIMITS_H */
36
#if HAVE_STRING_H
37
# include <string.h>
38
#endif  /* HAVE_STRING_H */
39
#if HAVE_STDINT_H
40
# include <stdint.h>
41
#endif  /* HAVE_STDINT_H */
42

43
#if HAVE_MATH_H
44
# define _USE_MATH_DEFINES  /* for MSVC */
45
# include <math.h>
46
#endif  /* HAVE_MATH_H */
47
#ifndef M_PI
48
# define M_PI 3.14159265358979323846
49
#endif
50

51
#include <sixel.h>
52

53
#include "cpu.h"
54
#include "logger.h"
55
#include "compat_stub.h"
56
#include "threading.h"
57

58
#if SIXEL_ENABLE_THREADS
59
# include "threadpool.h"
60
#endif
61

62
#if defined(__GNUC__) && defined(__i386__)
63
/*
64
 * i386 callers may enter with only 4- or 8-byte stack alignment. Force
65
 * realignment for SSE2-heavy routines to avoid movaps spills to unaligned
66
 * stack slots when SIMD is enabled via SIXEL_SIMD_LEVEL. Mark affected
67
 * functions noinline so the prologue that performs realignment is not
68
 * dropped by inlining.
69
 */
70
# define SIXEL_ALIGN_STACK __attribute__((force_align_arg_pointer))
71
# define SIXEL_NO_INLINE __attribute__((noinline))
72
#else
73
# define SIXEL_ALIGN_STACK
74
# define SIXEL_NO_INLINE
75
#endif
76

77
#if defined(HAVE_IMMINTRIN_H) && \
78
    (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
79
     defined(_M_IX86))
80
# define SIXEL_HAS_X86_INTRIN 1
81
# include <immintrin.h>
82
#endif
83

84
#if defined(__GNUC__) && !defined(__clang__)
85
/*
86
 * GCC reports a -Wpsabi note when __m512 parameters are present because the
87
 * calling convention changed in GCC 4.6. All callers and callees in this
88
 * translation unit share the same compiler, so suppress the note globally to
89
 * keep the output clean on AVX-512 builds.
90
 */
91
#pragma GCC diagnostic ignored "-Wpsabi"
92
#endif
93

94
#if defined(HAVE_SSE2)
95
/*
96
 * MSVC does not define __SSE2__ on x86/x64.  Instead, rely on the
97
 * architecture macros it provides so SIMD paths stay enabled after the
98
 * configure probe has validated SSE2 support.
99
 */
100
# if defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
101
    (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
102
#  if defined(HAVE_EMMINTRIN_H)
103
#   include <emmintrin.h>
104
#   define SIXEL_USE_SSE2 1
105
#  endif
106
# endif
107
#endif
108

109
#if defined(SIXEL_HAS_X86_INTRIN)
110
/* Reset ISA target macros in case another compilation unit defined them */
111
/* earlier in a unity or amalgamation build. */
112
# if defined(SIXEL_TARGET_AVX)
113
#  undef SIXEL_TARGET_AVX
114
# endif
115
# if defined(SIXEL_TARGET_AVX2)
116
#  undef SIXEL_TARGET_AVX2
117
# endif
118
# if defined(SIXEL_TARGET_AVX512)
119
#  undef SIXEL_TARGET_AVX512
120
# endif
121
# if defined(__GNUC__)
122
#  if !defined(__clang__)
123
#   define SIXEL_TARGET_AVX __attribute__((target("avx")))
124
#   define SIXEL_TARGET_AVX2 __attribute__((target("avx2")))
125
#   define SIXEL_TARGET_AVX512 __attribute__((target("avx512f")))
126
#   define SIXEL_USE_AVX 1
127
#  else
128
/*
129
 * clang rejects returning AVX vectors when the translation unit target
130
 * does not already include the corresponding ISA.  Guard runtime AVX
131
 * helpers with compile-time ISA availability to keep non-AVX builds
132
 * warning-free while still using AVX when the compiler enables it.
133
 */
134
#   define SIXEL_TARGET_AVX
135
#   define SIXEL_TARGET_AVX2
136
#   define SIXEL_TARGET_AVX512
137
#   if defined(__AVX__)
138
#    define SIXEL_USE_AVX 1
139
#   endif
140
#   if defined(__AVX2__)
141
#    define SIXEL_USE_AVX2 1
142
#   endif
143
#   if defined(__AVX512F__)
144
#    define SIXEL_USE_AVX512 1
145
#   endif
146
#  endif
147
# else
148
#  define SIXEL_TARGET_AVX
149
#  define SIXEL_TARGET_AVX2
150
#  define SIXEL_TARGET_AVX512
151
#  if defined(__AVX__)
152
#   define SIXEL_USE_AVX 1
153
#  endif
154
#  if defined(__AVX2__)
155
#   define SIXEL_USE_AVX2 1
156
#  endif
157
#  if defined(__AVX512F__)
158
#   define SIXEL_USE_AVX512 1
159
#  endif
160
# endif
161
#endif
162

163
#if defined(__GNUC__) && !defined(__clang__)
164
# pragma GCC diagnostic push
165
# pragma GCC diagnostic ignored "-Wpsabi"
166
# undef SIXEL_USE_AVX
167
# undef SIXEL_USE_AVX2
168
# undef SIXEL_USE_AVX512
169
#endif
170

171
#if defined(HAVE_NEON)
172
# if (defined(__ARM_NEON) || defined(__ARM_NEON__))
173
#  if defined(HAVE_ARM_NEON_H)
174
#   include <arm_neon.h>
175
#   define SIXEL_USE_NEON 1
176
#  endif
177
# endif
178
#endif
179

180
#if !defined(MAX)
181
# define MAX(l, r) ((l) > (r) ? (l) : (r))
182
#endif
183
#if !defined(MIN)
184
#define MIN(l, r) ((l) < (r) ? (l) : (r))
185
#endif
186

187

188
#if 0
189
/* function Nearest Neighbor */
190
static double
191
nearest_neighbor(double const d)
192
{
193
    if (d <= 0.5) {
194
        return 1.0;
195
    }
196
    return 0.0;
197
}
198
#endif
199

200

201
/* function Bi-linear */
202
static double
203
bilinear(double const d)
136,831,660✔
204
{
205
    if (d < 1.0) {
136,831,660✔
206
        return 1.0 - d;
115,432,072✔
207
    }
208
    return 0.0;
209
}
210

211

212
/* function Welsh */
213
static double
214
welsh(double const d)
3,349,600✔
215
{
216
    if (d < 1.0) {
3,349,600✔
217
        return 1.0 - d * d;
598,400✔
218
    }
219
    return 0.0;
220
}
221

222

223
/* function Bi-cubic */
224
static double
225
bicubic(double const d)
5,192,830✔
226
{
227
    if (d <= 1.0) {
5,192,830✔
228
        return 1.0 + (d - 2.0) * d * d;
2,506,830✔
229
    }
230
    if (d <= 2.0) {
2,686,000✔
231
        return 4.0 + d * (-8.0 + d * (5.0 - d));
2,480,400✔
232
    }
233
    return 0.0;
234
}
235

236

237
/* function sinc
238
 * sinc(x) = sin(PI * x) / (PI * x)
239
 */
240
static double
241
sinc(double const x)
107,685,264✔
242
{
243
    return sin(M_PI * x) / (M_PI * x);
107,685,264✔
244
}
245

246

247
/* function Lanczos-2
248
 * Lanczos(x) = sinc(x) * sinc(x / 2) , |x| <= 2
249
 *            = 0, |x| > 2
250
 */
251
static double
252
lanczos2(double const d)
12,435,064✔
253
{
254
    if (d == 0.0) {
12,435,064!
255
        return 1.0;
256
    }
257
    if (d < 2.0) {
12,435,064✔
258
        return sinc(d) * sinc(d / 2.0);
2,879,808✔
259
    }
260
    return 0.0;
261
}
262

263

264
/* function Lanczos-3
265
 * Lanczos(x) = sinc(x) * sinc(x / 3) , |x| <= 3
266
 *            = 0, |x| > 3
267
 */
268
static double
269
lanczos3(double const d)
109,090,960✔
270
{
271
    if (d == 0.0) {
109,090,960!
272
        return 1.0;
273
    }
274
    if (d < 3.0) {
109,090,960✔
275
        return sinc(d) * sinc(d / 3.0);
102,135,352✔
276
    }
277
    return 0.0;
278
}
279

280
/* function Lanczos-4
281
 * Lanczos(x) = sinc(x) * sinc(x / 4) , |x| <= 4
282
 *            = 0, |x| > 4
283
 */
284
static double
285
lanczos4(double const d)
11,631,432✔
286
{
287
    if (d == 0.0) {
11,631,432!
288
        return 1.0;
289
    }
290
    if (d < 4.0) {
11,631,432✔
291
        return sinc(d) * sinc(d / 4.0);
2,670,104✔
292
    }
293
    return 0.0;
294
}
295

296

297
static double
298
gaussian(double const d)
2,714,600✔
299
{
300
    return exp(-2.0 * d * d) * sqrt(2.0 / M_PI);
2,714,600✔
301
}
302

303

304
static double
305
hanning(double const d)
2,923,184✔
306
{
307
    return 0.5 + 0.5 * cos(d * M_PI);
2,923,184✔
308
}
309

310

311
static double
312
hamming(const double d)
3,349,600✔
313
{
314
    return 0.54 + 0.46 * cos(d * M_PI);
3,349,600✔
315
}
316

317

318
static unsigned char
319
normalize(double x, double total)
12,031,659✔
320
{
321
    int result;
12,031,659✔
322

323
    result = floor(x / total);
12,031,659✔
324
    if (result > 255) {
12,031,659!
325
        return 0xff;
326
    }
327
    if (result < 0) {
12,029,989!
328
        return 0x00;
329
    }
330
    return (unsigned char)result;
12,029,018✔
331
}
332

333
static int
334
sixel_scale_simd_level(void)
121✔
335
{
336
    static int simd_level = -2;
121✔
337

338
    if (simd_level == -2) {
121!
339
        simd_level = sixel_cpu_simd_level();
121✔
340
#if defined(__i386__)
341
        /*
342
         * AVX and later widen the alignment requirement for stack spills to
343
         * 32 bytes. i386 stack realignment from force_align_arg_pointer only
344
         * guarantees 16-byte boundaries, so keep the runtime level capped at
345
         * SSE2 to avoid vmovaps faults when YMM locals spill.
346
         */
347
        if (simd_level > SIXEL_SIMD_LEVEL_SSE2) {
348
            simd_level = SIXEL_SIMD_LEVEL_SSE2;
349
        }
350
#endif
351
    }
352

353
    return simd_level;
121✔
354
}
355

356
static float
357
sixel_clamp_unit_f32(float value)
712,608✔
358
{
359
    /*
360
     * Resampling kernels with negative lobes can push linear RGB values
361
     * outside the unit interval. Clamp here so downstream conversions do
362
     * not collapse to black.
363
     */
364
    if (value < 0.0f) {
712,608!
365
        return 0.0f;
366
    }
367
    if (value > 1.0f) {
712,541!
368
        return 1.0f;
369
    }
370

371
    return value;
372
}
373

374
#if defined(HAVE_IMMINTRIN_H)
375
#if defined(SIXEL_USE_AVX)
376
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
377
sixel_avx_load_rgb_ps(unsigned char const *psrc)
378
{
379
    __m128i pixi128;
380
    __m128 pixf128;
381
    __m256 pixf256;
382

383
    /*
384
     * Build the byte vector explicitly so the AVX path never accumulates
385
     * garbage data when widening to 32-bit lanes.
386
     */
387
    pixi128 = _mm_setr_epi8((char)psrc[0],
388
                            (char)psrc[1],
389
                            (char)psrc[2],
390
                            0,
391
                            0, 0, 0, 0,
392
                            0, 0, 0, 0,
393
                            0, 0, 0, 0);
394
    pixf128 = _mm_cvtepi32_ps(pixi128);
395
    pixf256 = _mm256_castps128_ps256(pixf128);
396
    pixf256 = _mm256_insertf128_ps(pixf256, _mm_setzero_ps(), 1);
397
    return pixf256;
398
}
399

400
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX void
401
sixel_avx_store_rgb_u8(__m256 acc, double total, unsigned char *dst)
402
{
403
    __m256 scalev;
404
    __m256 minv;
405
    __m256 maxv;
406
    __m256i acci;
407
    int out[8];
408

409
    scalev = _mm256_set1_ps((float)(1.0 / total));
410
    acc = _mm256_mul_ps(acc, scalev);
411
    minv = _mm256_set1_ps(0.0f);
412
    maxv = _mm256_set1_ps(255.0f);
413
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
414
    acci = _mm256_cvtps_epi32(acc);
415
    _mm256_storeu_si256((__m256i *)out, acci);
416
    dst[0] = (unsigned char)out[0];
417
    dst[1] = (unsigned char)out[1];
418
    dst[2] = (unsigned char)out[2];
419
}
420

421
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
422
sixel_avx_zero_ps(void)
423
{
424
    return _mm256_setzero_ps();
425
}
426

427
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
428
sixel_avx_muladd_ps(__m256 acc, __m256 pix, float weight)
429
{
430
    __m256 wv;
431

432
    wv = _mm256_set1_ps(weight);
433
    return _mm256_add_ps(acc, _mm256_mul_ps(pix, wv));
434
}
435

436
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
437
sixel_avx_load_rgb_f32(float const *psrc)
438
{
439
    __m256 pixf;
440

441
    pixf = _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f,
442
                         psrc[2], psrc[1], psrc[0], 0.0f);
443
    return pixf;
444
}
445

446
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX void
447
sixel_avx_store_rgb_f32(__m256 acc, double total, float *dst)
448
{
449
    __m256 scalev;
450
    __m256 minv;
451
    __m256 maxv;
452
    float out[8];
453

454
    scalev = _mm256_set1_ps((float)(1.0 / total));
455
    acc = _mm256_mul_ps(acc, scalev);
456
    minv = _mm256_set1_ps(0.0f);
457
    maxv = _mm256_set1_ps(1.0f);
458
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
459
    _mm256_storeu_ps(out, acc);
460
    dst[0] = out[0];
461
    dst[1] = out[1];
462
    dst[2] = out[2];
463
}
464
#endif  /* SIXEL_USE_AVX */
465

466
#if defined(SIXEL_USE_AVX2)
467
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
468
sixel_avx2_load_rgb_ps(unsigned char const *psrc)
469
{
470
    __m128i pixi128;
471
    __m256i pixi256;
472

473
    /*
474
     * Keep the unused bytes zeroed so widening to epi32 does not pull in
475
     * stack junk and bias every output channel toward white.
476
     */
477
    pixi128 = _mm_setr_epi8((char)psrc[0],
478
                            (char)psrc[1],
479
                            (char)psrc[2],
480
                            0,
481
                            0, 0, 0, 0,
482
                            0, 0, 0, 0,
483
                            0, 0, 0, 0);
484
    pixi256 = _mm256_cvtepu8_epi32(pixi128);
485
    return _mm256_cvtepi32_ps(pixi256);
486
}
487

488
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 void
489
sixel_avx2_store_rgb_u8(__m256 acc, double total, unsigned char *dst)
490
{
491
    __m256 scalev;
492
    __m256 minv;
493
    __m256 maxv;
494
    __m256i acci;
495
    int out[8];
496

497
    scalev = _mm256_set1_ps((float)(1.0 / total));
498
    acc = _mm256_mul_ps(acc, scalev);
499
    minv = _mm256_set1_ps(0.0f);
500
    maxv = _mm256_set1_ps(255.0f);
501
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
502
    acci = _mm256_cvtps_epi32(acc);
503
    _mm256_storeu_si256((__m256i *)out, acci);
504
    dst[0] = (unsigned char)out[0];
505
    dst[1] = (unsigned char)out[1];
506
    dst[2] = (unsigned char)out[2];
507
}
508

509
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
510
sixel_avx2_zero_ps(void)
511
{
512
    return _mm256_setzero_ps();
513
}
514

515
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
516
sixel_avx2_muladd_ps(__m256 acc, __m256 pix, float weight)
517
{
518
    __m256 wv;
519

520
    wv = _mm256_set1_ps(weight);
521
    return _mm256_add_ps(acc, _mm256_mul_ps(pix, wv));
522
}
523

524
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
525
sixel_avx2_load_rgb_f32(float const *psrc)
526
{
527
    __m256 pixf;
528

529
    pixf = _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f,
530
                         psrc[2], psrc[1], psrc[0], 0.0f);
531
    return pixf;
532
}
533

534
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 void
535
sixel_avx2_store_rgb_f32(__m256 acc, double total, float *dst)
536
{
537
    __m256 scalev;
538
    __m256 minv;
539
    __m256 maxv;
540
    float out[8];
541

542
    scalev = _mm256_set1_ps((float)(1.0 / total));
543
    acc = _mm256_mul_ps(acc, scalev);
544
    minv = _mm256_set1_ps(0.0f);
545
    maxv = _mm256_set1_ps(1.0f);
546
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
547
    _mm256_storeu_ps(out, acc);
548
    dst[0] = out[0];
549
    dst[1] = out[1];
550
    dst[2] = out[2];
551
}
552
#endif  /* SIXEL_USE_AVX2 */
553

554
#if defined(SIXEL_USE_AVX512)
555
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
556
sixel_avx512_load_rgb_ps(unsigned char const *psrc)
557
{
558
    __m128i pixi128;
559
    __m512i pixi512;
560

561
    pixi128 = _mm_setr_epi8((char)psrc[0],
562
                            (char)psrc[1],
563
                            (char)psrc[2],
564
                            0,
565
                            0, 0, 0, 0,
566
                            0, 0, 0, 0,
567
                            0, 0, 0, 0);
568
    pixi512 = _mm512_cvtepu8_epi32(pixi128);
569
    return _mm512_cvtepi32_ps(pixi512);
570
}
571

572
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 void
573
sixel_avx512_store_rgb_u8(__m512 const *acc,
574
                          double total,
575
                          unsigned char *dst)
576
{
577
    __m512 scalev;
578
    __m512 minv;
579
    __m512 maxv;
580
    __m512 accv;
581
    __m512i acci;
582
    int out[16];
583

584
    scalev = _mm512_set1_ps((float)(1.0 / total));
585
    accv = _mm512_mul_ps(*acc, scalev);
586
    minv = _mm512_set1_ps(0.0f);
587
    maxv = _mm512_set1_ps(255.0f);
588
    accv = _mm512_max_ps(minv, _mm512_min_ps(accv, maxv));
589
    acci = _mm512_cvtps_epi32(accv);
590
    _mm512_storeu_si512((void *)out, acci);
591
    dst[0] = (unsigned char)out[0];
592
    dst[1] = (unsigned char)out[1];
593
    dst[2] = (unsigned char)out[2];
594
}
595

596
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
597
sixel_avx512_zero_ps(void)
598
{
599
    return _mm512_setzero_ps();
600
}
601

602
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
603
sixel_avx512_muladd_ps(__m512 acc, __m512 pix, float weight)
604
{
605
    __m512 wv;
606

607
    wv = _mm512_set1_ps(weight);
608
    return _mm512_add_ps(acc, _mm512_mul_ps(pix, wv));
609
}
610

611
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
612
sixel_avx512_load_rgb_f32(float const *psrc)
613
{
614
    __m512 pixf;
615

616
    pixf = _mm512_set_ps(0.0f, 0.0f, 0.0f, 0.0f,
617
                         0.0f, 0.0f, 0.0f, 0.0f,
618
                         0.0f, 0.0f, 0.0f, 0.0f,
619
                         psrc[2], psrc[1], psrc[0], 0.0f);
620
    return pixf;
621
}
622

623
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 void
624
sixel_avx512_store_rgb_f32(__m512 const *acc,
625
                           double total,
626
                           float *dst)
627
{
628
    __m512 scalev;
629
    __m512 minv;
630
    __m512 maxv;
631
    __m512 accv;
632
    float out[16];
633

634
    scalev = _mm512_set1_ps((float)(1.0 / total));
635
    accv = _mm512_mul_ps(*acc, scalev);
636
    minv = _mm512_set1_ps(0.0f);
637
    maxv = _mm512_set1_ps(1.0f);
638
    accv = _mm512_max_ps(minv, _mm512_min_ps(accv, maxv));
639
    _mm512_storeu_ps(out, accv);
640
    dst[0] = out[0];
641
    dst[1] = out[1];
642
    dst[2] = out[2];
643
}
644
#endif  /* SIXEL_USE_AVX512 */
645
#endif /* HAVE_IMMINTRIN_H */
646

647

648
static void
649
scale_without_resampling(
9✔
650
    unsigned char *dst,
651
    unsigned char const *src,
652
    int const srcw,
653
    int const srch,
654
    int const dstw,
655
    int const dsth,
656
    int const depth)
657
{
658
    int w;
9✔
659
    int h;
9✔
660
    int x;
9✔
661
    int y;
9✔
662
    int i;
9✔
663
    int pos;
9✔
664

665
    for (h = 0; h < dsth; h++) {
419✔
666
        for (w = 0; w < dstw; w++) {
252,450✔
667
            x = (long)w * srcw / dstw;
252,040✔
668
            y = (long)h * srch / dsth;
252,040✔
669
            for (i = 0; i < depth; i++) {
1,008,160✔
670
                pos = (y * srcw + x) * depth + i;
756,120✔
671
                dst[(h * dstw + w) * depth + i] = src[pos];
756,120✔
672
            }
673
        }
674
    }
675
}
9✔
676

677
static void
678
scale_without_resampling_float32(
20✔
679
    float *dst,
680
    float const *src,
681
    int const srcw,
682
    int const srch,
683
    int const dstw,
684
    int const dsth,
685
    int const depth)
686
{
687
    int w;
20✔
688
    int h;
20✔
689
    int x;
20✔
690
    int y;
20✔
691
    int i;
20✔
692
    int pos;
20✔
693

694
    for (h = 0; h < dsth; h++) {
260✔
695
        for (w = 0; w < dstw; w++) {
3,120✔
696
            x = (long)w * srcw / dstw;
2,880✔
697
            y = (long)h * srch / dsth;
2,880✔
698
            for (i = 0; i < depth; i++) {
11,520✔
699
                pos = (y * srcw + x) * depth + i;
8,640✔
700
                dst[(h * dstw + w) * depth + i] = src[pos];
8,640✔
701
            }
702
        }
703
    }
704
}
20✔
705

706

707
typedef double (*resample_fn_t)(double const d);
708

709
#if defined(__GNUC__) && !defined(__clang__)
710
#pragma GCC diagnostic push
711
/*
712
 * GCC emits a -Wpsabi note for __m512 parameters because the calling
713
 * convention changed in GCC 4.6. The functions only pass vectors between
714
 * helpers compiled with the same compiler, so suppress the noise locally.
715
 */
716
#pragma GCC diagnostic ignored "-Wpsabi"
717
#endif
718

719
/*
720
 * Two-pass separable filter helpers. Each function processes a single row so
721
 * the caller may invoke them serially or from a threadpool worker. On i386 we
722
 * also mark the functions noinline to ensure the stack-realigning prologue
723
 * from SIXEL_ALIGN_STACK is preserved under optimization.
724
 */
725
static SIXEL_ALIGN_STACK SIXEL_NO_INLINE void
726
scale_horizontal_row(
45,784✔
727
    unsigned char *tmp,
728
    unsigned char const *src,
729
    int const srcw,
730
    int const dstw,
731
    int const depth,
732
    int const y,
733
    resample_fn_t const f_resample,
734
    double const n,
735
    int const simd_level)
736
{
737
    int w;
45,784✔
738
    int x;
45,784✔
739
    int i;
45,784✔
740
    int pos;
45,784✔
741
    int x_first;
45,784✔
742
    int x_last;
45,784✔
743
    double center_x;
45,784✔
744
    double diff_x;
45,784✔
745
    double weight;
45,784✔
746
    double total;
45,784✔
747
    double offsets[8];
45,784✔
748
#if !defined(SIXEL_USE_AVX512) && !defined(SIXEL_USE_AVX2) && \
749
    !defined(SIXEL_USE_AVX) && !defined(SIXEL_USE_SSE2) && \
750
    !defined(SIXEL_USE_NEON)
751
    /*
752
     * No SIMD backends are compiled for this target, so the SIMD level gate
753
     * becomes a dead parameter. Silence -Wunused-parameter on 32-bit GCC
754
     * builds while keeping the signature identical across configurations.
755
     */
756
    (void)simd_level;
11,446✔
757
#endif
758
#if defined(SIXEL_USE_AVX512)
759
    __m512 acc512;
760
    __m512 pix512;
761
#endif
762
#if defined(SIXEL_USE_AVX2) || defined(SIXEL_USE_AVX)
763
    __m256 acc256;
764
#endif
765
#if defined(SIXEL_USE_SSE2)
766
    /*
767
     * __m128 locals remain on the stack. On i386 callers may arrive with
768
     * only 4- or 8-byte alignment, so movaps spills can fault when SSE2 is
769
     * forced. SIXEL_ALIGN_STACK realigns the frame on entry to keep the
770
     * SSE2 path consistent with the 16-byte guarantee on x86_64.
771
     */
772
    __m128 acc128;
22,892✔
773
    __m128 minv128;
22,892✔
774
    __m128 maxv128;
22,892✔
775
    __m128 scalev128;
22,892✔
776
    __m128 wv128;
22,892✔
777
    __m128 pixf128;
22,892✔
778
    __m128i pixi128;
22,892✔
779
    __m128i acci128;
22,892✔
780
    __m128i acc16_128;
22,892✔
781
    unsigned int pixel128;
22,892✔
782
#endif
783
#if defined(SIXEL_USE_NEON)
784
    float32x4_t acc_neon;
11,446✔
785
    float32x4_t minv_neon;
11,446✔
786
    float32x4_t maxv_neon;
11,446✔
787
    float32x4_t scalev_neon;
11,446✔
788
    float32x4_t wv_neon;
11,446✔
789
    float32x4_t pixf_neon;
11,446✔
790
    uint32x4_t pix32_neon;
11,446✔
791
    uint32x4_t acci_neon;
11,446✔
792
    uint16x4_t acc16_neon;
11,446✔
793
    uint8x8_t acc8_neon;
11,446✔
794
    uint8_t outb_neon[8];
11,446✔
795
#endif
796

797
    for (w = 0; w < dstw; w++) {
10,586,104✔
798
        total = 0.0;
42,161,280✔
799
        for (i = 0; i < depth; i++) {
42,161,280✔
800
            offsets[i] = 0;
31,620,960✔
801
        }
802

803
        if (dstw >= srcw) {
10,540,320!
804
            center_x = (w + 0.5) * srcw / dstw;
768,000✔
805
            x_first = MAX((int)(center_x - n), 0);
768,000!
806
            x_last = MIN((int)(center_x + n), srcw - 1);
768,000!
807
        } else {
808
            center_x = w + 0.5;
9,772,320✔
809
            x_first = MAX((int)floor((center_x - n) * srcw / dstw), 0);
9,772,320✔
810
            x_last = MIN((int)floor((center_x + n) * srcw / dstw),
9,772,320✔
811
                         srcw - 1);
812
        }
813

814
#if defined(SIXEL_USE_AVX512)
815
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
816
#if defined(__GNUC__) && !defined(__clang__)
817
#pragma GCC diagnostic push
818
#pragma GCC diagnostic ignored "-Wpsabi"
819
#endif
820
            acc512 = sixel_avx512_zero_ps();
821

822
            for (x = x_first; x <= x_last; x++) {
823
                diff_x = (dstw >= srcw)
824
                             ? (x + 0.5) - center_x
825
                             : (x + 0.5) * dstw / srcw - center_x;
826
                weight = f_resample(fabs(diff_x));
827
                pos = (y * srcw + x) * depth;
828
                pix512 = sixel_avx512_load_rgb_ps(src + pos);
829
                acc512 = sixel_avx512_muladd_ps(
830
                    acc512,
831
                    pix512,
832
                    (float)weight);
833
                total += weight;
834
            }
835
            if (total > 0.0) {
836
                pos = (y * dstw + w) * depth;
837
                sixel_avx512_store_rgb_u8(&acc512, total, tmp + pos);
838
            }
839
#if defined(__GNUC__) && !defined(__clang__)
840
#pragma GCC diagnostic pop
841
#endif
842
            continue;
843
        }
844
#endif
845
#if defined(SIXEL_USE_AVX2)
846
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
847
            acc256 = sixel_avx2_zero_ps();
848

849
            for (x = x_first; x <= x_last; x++) {
850
                diff_x = (dstw >= srcw)
851
                             ? (x + 0.5) - center_x
852
                             : (x + 0.5) * dstw / srcw - center_x;
853
                weight = f_resample(fabs(diff_x));
854
                pos = (y * srcw + x) * depth;
855
                acc256 = sixel_avx2_muladd_ps(
856
                    acc256,
857
                    sixel_avx2_load_rgb_ps(src + pos),
858
                    (float)weight);
859
                total += weight;
860
            }
861
            if (total > 0.0) {
862
                pos = (y * dstw + w) * depth;
863
                sixel_avx2_store_rgb_u8(acc256, total, tmp + pos);
864
            }
865
            continue;
866
        }
867
#endif
868
#if defined(SIXEL_USE_AVX)
869
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
870
            acc256 = sixel_avx_zero_ps();
871

872
            for (x = x_first; x <= x_last; x++) {
873
                diff_x = (dstw >= srcw)
874
                             ? (x + 0.5) - center_x
875
                             : (x + 0.5) * dstw / srcw - center_x;
876
                weight = f_resample(fabs(diff_x));
877
                pos = (y * srcw + x) * depth;
878
                acc256 = sixel_avx_muladd_ps(
879
                    acc256,
880
                    sixel_avx_load_rgb_ps(src + pos),
881
                    (float)weight);
882
                total += weight;
883
            }
884
            if (total > 0.0) {
885
                pos = (y * dstw + w) * depth;
886
                sixel_avx_store_rgb_u8(acc256, total, tmp + pos);
887
            }
888
            continue;
889
        }
890
#endif
891
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
892
        if (depth == 3
15,810,480!
893
# if defined(SIXEL_USE_SSE2)
894
            && simd_level >= SIXEL_SIMD_LEVEL_SSE2
5,270,160!
895
# elif defined(SIXEL_USE_NEON)
896
            && simd_level >= SIXEL_SIMD_LEVEL_NEON
2,635,080!
897
# endif
898
            ) {
899
#if defined(SIXEL_USE_SSE2)
900
            acc128 = _mm_setzero_ps();
901
#elif defined(SIXEL_USE_NEON)
902
            acc_neon = vdupq_n_f32(0.0f);
903
#endif
904
            for (x = x_first; x <= x_last; x++) {
168,010,884✔
905
                diff_x = (dstw >= srcw)
320,211,288!
906
                             ? (x + 0.5) - center_x
1,724,400✔
907
                             : (x + 0.5) * dstw / srcw - center_x;
160,105,644✔
908
                weight = f_resample(fabs(diff_x));
160,105,644✔
909
                pos = (y * srcw + x) * depth;
160,105,644✔
910
                const unsigned char *psrc = src + pos;
160,105,644✔
911
#if defined(SIXEL_USE_SSE2)
912
                pixel128 = psrc[0] | (psrc[1] << 8) | (psrc[2] << 16);
106,737,096✔
913
                pixi128 = _mm_cvtsi32_si128((int)pixel128);
106,737,096✔
914
                pixi128 = _mm_unpacklo_epi8(pixi128, _mm_setzero_si128());
106,737,096✔
915
                pixi128 = _mm_unpacklo_epi16(pixi128, _mm_setzero_si128());
106,737,096✔
916
                pixf128 = _mm_cvtepi32_ps(pixi128);
106,737,096✔
917
                wv128 = _mm_set1_ps((float)weight);
106,737,096✔
918
                acc128 = _mm_add_ps(acc128, _mm_mul_ps(pixf128, wv128));
106,737,096✔
919
#else /* NEON */
920
                pix32_neon = (uint32x4_t){psrc[0], psrc[1], psrc[2], 0};
53,368,548✔
921
                pixf_neon = vcvtq_f32_u32(pix32_neon);
53,368,548✔
922
                wv_neon = vdupq_n_f32((float)weight);
53,368,548✔
923
                acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
53,368,548✔
924
#endif
925
                total += weight;
160,105,644✔
926
            }
927
            if (total > 0.0) {
7,905,240!
928
#if defined(SIXEL_USE_SSE2)
929
                scalev128 = _mm_set1_ps((float)(1.0 / total));
5,270,160✔
930
                acc128 = _mm_mul_ps(acc128, scalev128);
5,270,160✔
931
                minv128 = _mm_set1_ps(0.0f);
5,270,160✔
932
                maxv128 = _mm_set1_ps(255.0f);
5,270,160✔
933
                acc128 = _mm_max_ps(minv128, _mm_min_ps(acc128, maxv128));
5,270,160✔
934
                acci128 = _mm_cvtps_epi32(acc128);
5,270,160✔
935
                acc16_128 = _mm_packs_epi32(acci128, _mm_setzero_si128());
5,270,160✔
936
                acc16_128 = _mm_packus_epi16(acc16_128, _mm_setzero_si128());
5,270,160✔
937
                pos = (y * dstw + w) * depth;
5,270,160✔
938
                pixel128 = (unsigned int)_mm_cvtsi128_si32(acc16_128);
5,270,160✔
939
                tmp[pos + 0] = (unsigned char)pixel128;
5,270,160✔
940
                tmp[pos + 1] = (unsigned char)(pixel128 >> 8);
5,270,160✔
941
                tmp[pos + 2] = (unsigned char)(pixel128 >> 16);
5,270,160✔
942
#else /* NEON */
943
                scalev_neon = vdupq_n_f32((float)(1.0 / total));
2,635,080✔
944
                acc_neon = vmulq_f32(acc_neon, scalev_neon);
2,635,080✔
945
                minv_neon = vdupq_n_f32(0.0f);
2,635,080✔
946
                maxv_neon = vdupq_n_f32(255.0f);
2,635,080✔
947
                acc_neon = vmaxq_f32(minv_neon,
2,635,080✔
948
                                     vminq_f32(acc_neon, maxv_neon));
949
                acci_neon = vcvtq_u32_f32(acc_neon);
2,635,080✔
950
                acc16_neon = vmovn_u32(acci_neon);
2,635,080✔
951
                acc8_neon = vmovn_u16(vcombine_u16(acc16_neon, acc16_neon));
2,635,080✔
952

953
                vst1_u8(outb_neon, acc8_neon);
2,635,080✔
954
                pos = (y * dstw + w) * depth;
2,635,080✔
955
                tmp[pos + 0] = outb_neon[0];
2,635,080✔
956
                tmp[pos + 1] = outb_neon[1];
2,635,080✔
957
                tmp[pos + 2] = outb_neon[2];
2,635,080✔
958
#endif
959
            }
960
            continue;
7,905,240✔
961
        }
962
#endif /* SIMD paths */
963

964
        for (x = x_first; x <= x_last; x++) {
56,003,628!
965
            diff_x = (dstw >= srcw)
106,737,096!
966
                         ? (x + 0.5) - center_x
574,800✔
967
                         : (x + 0.5) * dstw / srcw - center_x;
53,368,548!
968
            weight = f_resample(fabs(diff_x));
53,368,548✔
969
            for (i = 0; i < depth; i++) {
266,842,740!
970
                pos = (y * srcw + x) * depth + i;
160,105,644✔
971
                offsets[i] += src[pos] * weight;
160,105,644✔
972
            }
973
            total += weight;
53,368,548✔
974
        }
975

976
        if (total > 0.0) {
2,635,080!
977
            for (i = 0; i < depth; i++) {
10,540,320!
978
                pos = (y * dstw + w) * depth + i;
7,905,240✔
979
                tmp[pos] = normalize(offsets[i], total);
7,905,240✔
980
            }
981
        }
982
    }
983
}
45,784✔
984

985
static SIXEL_ALIGN_STACK SIXEL_NO_INLINE void
986
scale_vertical_row(
14,636✔
987
    unsigned char *dst,
988
    unsigned char const *tmp,
989
    int const dstw,
990
    int const dsth,
991
    int const depth,
992
    int const srch,
993
    int const h,
994
    resample_fn_t const f_resample,
995
    double const n,
996
    int const simd_level)
997
{
998
    int w;
14,636✔
999
    int y;
14,636✔
1000
    int i;
14,636✔
1001
    int pos;
14,636✔
1002
    int y_first;
14,636✔
1003
    int y_last;
14,636✔
1004
    double center_y;
14,636✔
1005
    double diff_y;
14,636✔
1006
    double weight;
14,636✔
1007
    double total;
14,636✔
1008
    double offsets[8];
14,636✔
1009
#if !defined(SIXEL_USE_AVX512) && !defined(SIXEL_USE_AVX2) && \
1010
    !defined(SIXEL_USE_AVX) && !defined(SIXEL_USE_SSE2) && \
1011
    !defined(SIXEL_USE_NEON)
1012
    /*
1013
     * When no SIMD implementations are present the runtime SIMD level does
1014
     * not influence the algorithm. Mark it unused to keep 32-bit GCC quiet
1015
     * without altering the interface shared with SIMD-enabled builds.
1016
     */
1017
    (void)simd_level;
3,659✔
1018
#endif
1019
#if defined(SIXEL_USE_AVX512)
1020
    __m512 acc512;
1021
    __m512 pix512;
1022
#endif
1023
#if defined(SIXEL_USE_AVX2) || defined(SIXEL_USE_AVX)
1024
    __m256 acc256;
1025
#endif
1026
#if defined(SIXEL_USE_SSE2)
1027
    __m128 acc128;
7,318✔
1028
    __m128 minv128;
7,318✔
1029
    __m128 maxv128;
7,318✔
1030
    __m128 scalev128;
7,318✔
1031
    __m128 wv128;
7,318✔
1032
    __m128 pixf128;
7,318✔
1033
    __m128i pixi128;
7,318✔
1034
    __m128i acci128;
7,318✔
1035
    __m128i acc16_128;
7,318✔
1036
    unsigned int pixel128;
7,318✔
1037
#endif
1038
#if defined(SIXEL_USE_NEON)
1039
    float32x4_t acc_neon;
3,659✔
1040
    float32x4_t minv_neon;
3,659✔
1041
    float32x4_t maxv_neon;
3,659✔
1042
    float32x4_t scalev_neon;
3,659✔
1043
    float32x4_t wv_neon;
3,659✔
1044
    float32x4_t pixf_neon;
3,659✔
1045
    uint32x4_t pix32_neon;
3,659✔
1046
    uint32x4_t acci_neon;
3,659✔
1047
    uint16x4_t acc16_neon;
3,659✔
1048
    uint8x8_t acc8_neon;
3,659✔
1049
    uint8_t outb_neon[8];
3,659✔
1050
#endif
1051

1052
    for (w = 0; w < dstw; w++) {
5,516,528✔
1053
        total = 0.0;
22,007,568✔
1054
        for (i = 0; i < depth; i++) {
22,007,568✔
1055
            offsets[i] = 0;
16,505,676✔
1056
        }
1057

1058
        if (dsth >= srch) {
5,501,892!
1059
            center_y = (h + 0.5) * srch / dsth;
2,346,000✔
1060
            y_first = MAX((int)(center_y - n), 0);
2,346,000!
1061
            y_last = MIN((int)(center_y + n), srch - 1);
2,346,000!
1062
        } else {
1063
            center_y = h + 0.5;
3,155,892✔
1064
            y_first = MAX((int)floor((center_y - n) * srch / dsth), 0);
3,155,892✔
1065
            y_last = MIN((int)floor((center_y + n) * srch / dsth),
3,155,892✔
1066
                         srch - 1);
1067
        }
1068

1069
#if defined(SIXEL_USE_AVX512)
1070
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
1071
#if defined(__GNUC__) && !defined(__clang__)
1072
#pragma GCC diagnostic push
1073
#pragma GCC diagnostic ignored "-Wpsabi"
1074
#endif
1075
            acc512 = sixel_avx512_zero_ps();
1076

1077
            for (y = y_first; y <= y_last; y++) {
1078
                diff_y = (dsth >= srch)
1079
                             ? (y + 0.5) - center_y
1080
                             : (y + 0.5) * dsth / srch - center_y;
1081
                weight = f_resample(fabs(diff_y));
1082
                pos = (y * dstw + w) * depth;
1083
                pix512 = sixel_avx512_load_rgb_ps(tmp + pos);
1084
                acc512 = sixel_avx512_muladd_ps(
1085
                    acc512,
1086
                    pix512,
1087
                    (float)weight);
1088
                total += weight;
1089
            }
1090
            if (total > 0.0) {
1091
                pos = (h * dstw + w) * depth;
1092
                sixel_avx512_store_rgb_u8(&acc512, total, dst + pos);
1093
            }
1094
#if defined(__GNUC__) && !defined(__clang__)
1095
#pragma GCC diagnostic pop
1096
#endif
1097
            continue;
1098
        }
1099
#endif
1100
#if defined(SIXEL_USE_AVX2)
1101
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
1102
            acc256 = sixel_avx2_zero_ps();
1103

1104
            for (y = y_first; y <= y_last; y++) {
1105
                diff_y = (dsth >= srch)
1106
                             ? (y + 0.5) - center_y
1107
                             : (y + 0.5) * dsth / srch - center_y;
1108
                weight = f_resample(fabs(diff_y));
1109
                pos = (y * dstw + w) * depth;
1110
                acc256 = sixel_avx2_muladd_ps(
1111
                    acc256,
1112
                    sixel_avx2_load_rgb_ps(tmp + pos),
1113
                    (float)weight);
1114
                total += weight;
1115
            }
1116
            if (total > 0.0) {
1117
                pos = (h * dstw + w) * depth;
1118
                sixel_avx2_store_rgb_u8(acc256, total, dst + pos);
1119
            }
1120
            continue;
1121
        }
1122
#endif
1123
#if defined(SIXEL_USE_AVX)
1124
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
1125
            acc256 = sixel_avx_zero_ps();
1126

1127
            for (y = y_first; y <= y_last; y++) {
1128
                diff_y = (dsth >= srch)
1129
                             ? (y + 0.5) - center_y
1130
                             : (y + 0.5) * dsth / srch - center_y;
1131
                weight = f_resample(fabs(diff_y));
1132
                pos = (y * dstw + w) * depth;
1133
                acc256 = sixel_avx_muladd_ps(
1134
                    acc256,
1135
                    sixel_avx_load_rgb_ps(tmp + pos),
1136
                    (float)weight);
1137
                total += weight;
1138
            }
1139
            if (total > 0.0) {
1140
                pos = (h * dstw + w) * depth;
1141
                sixel_avx_store_rgb_u8(acc256, total, dst + pos);
1142
            }
1143
            continue;
1144
        }
1145
#endif
1146
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
1147
        if (depth == 3
8,252,838!
1148
# if defined(SIXEL_USE_SSE2)
1149
            && simd_level >= SIXEL_SIMD_LEVEL_SSE2
2,750,946!
1150
# elif defined(SIXEL_USE_NEON)
1151
            && simd_level >= SIXEL_SIMD_LEVEL_NEON
1,375,473!
1152
# endif
1153
            ) {
1154
#if defined(SIXEL_USE_SSE2)
1155
            acc128 = _mm_setzero_ps();
1156
#elif defined(SIXEL_USE_NEON)
1157
            acc_neon = vdupq_n_f32(0.0f);
1158
#endif
1159
            for (y = y_first; y <= y_last; y++) {
26,579,940✔
1160
                diff_y = (dsth >= srch)
44,907,042!
1161
                             ? (y + 0.5) - center_y
5,257,560✔
1162
                             : (y + 0.5) * dsth / srch - center_y;
22,453,521✔
1163
                weight = f_resample(fabs(diff_y));
22,453,521✔
1164
                pos = (y * dstw + w) * depth;
22,453,521✔
1165
                const unsigned char *psrc = tmp + pos;
22,453,521✔
1166
#if defined(SIXEL_USE_SSE2)
1167
                pixel128 = psrc[0] | (psrc[1] << 8) | (psrc[2] << 16);
14,969,014✔
1168
                pixi128 = _mm_cvtsi32_si128((int)pixel128);
14,969,014✔
1169
                pixi128 = _mm_unpacklo_epi8(pixi128, _mm_setzero_si128());
14,969,014✔
1170
                pixi128 = _mm_unpacklo_epi16(pixi128, _mm_setzero_si128());
14,969,014✔
1171
                pixf128 = _mm_cvtepi32_ps(pixi128);
14,969,014✔
1172
                wv128 = _mm_set1_ps((float)weight);
14,969,014✔
1173
                acc128 = _mm_add_ps(acc128, _mm_mul_ps(pixf128, wv128));
14,969,014✔
1174
#else /* NEON */
1175
                pix32_neon = (uint32x4_t){psrc[0], psrc[1], psrc[2], 0};
7,484,507✔
1176
                pixf_neon = vcvtq_f32_u32(pix32_neon);
7,484,507✔
1177
                wv_neon = vdupq_n_f32((float)weight);
7,484,507✔
1178
                acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
7,484,507✔
1179
#endif
1180
                total += weight;
22,453,521✔
1181
            }
1182
            if (total > 0.0) {
4,126,419!
1183
#if defined(SIXEL_USE_SSE2)
1184
                scalev128 = _mm_set1_ps((float)(1.0 / total));
2,750,946✔
1185
                acc128 = _mm_mul_ps(acc128, scalev128);
2,750,946✔
1186
                minv128 = _mm_set1_ps(0.0f);
2,750,946✔
1187
                maxv128 = _mm_set1_ps(255.0f);
2,750,946✔
1188
                acc128 = _mm_max_ps(minv128, _mm_min_ps(acc128, maxv128));
2,750,946✔
1189
                acci128 = _mm_cvtps_epi32(acc128);
2,750,946✔
1190
                acc16_128 = _mm_packs_epi32(acci128, _mm_setzero_si128());
2,750,946✔
1191
                acc16_128 = _mm_packus_epi16(acc16_128, _mm_setzero_si128());
2,750,946✔
1192
                pos = (h * dstw + w) * depth;
2,750,946✔
1193
                pixel128 = (unsigned int)_mm_cvtsi128_si32(acc16_128);
2,750,946✔
1194
                dst[pos + 0] = (unsigned char)pixel128;
2,750,946✔
1195
                dst[pos + 1] = (unsigned char)(pixel128 >> 8);
2,750,946✔
1196
                dst[pos + 2] = (unsigned char)(pixel128 >> 16);
2,750,946✔
1197
#else /* NEON */
1198
                scalev_neon = vdupq_n_f32((float)(1.0 / total));
1,375,473✔
1199
                acc_neon = vmulq_f32(acc_neon, scalev_neon);
1,375,473✔
1200
                minv_neon = vdupq_n_f32(0.0f);
1,375,473✔
1201
                maxv_neon = vdupq_n_f32(255.0f);
1,375,473✔
1202
                acc_neon = vmaxq_f32(minv_neon,
1,375,473✔
1203
                                     vminq_f32(acc_neon, maxv_neon));
1204
                acci_neon = vcvtq_u32_f32(acc_neon);
1,375,473✔
1205
                acc16_neon = vmovn_u32(acci_neon);
1,375,473✔
1206
                acc8_neon = vmovn_u16(vcombine_u16(acc16_neon, acc16_neon));
1,375,473✔
1207

1208
                vst1_u8(outb_neon, acc8_neon);
1,375,473✔
1209
                pos = (h * dstw + w) * depth;
1,375,473✔
1210
                dst[pos + 0] = outb_neon[0];
1,375,473✔
1211
                dst[pos + 1] = outb_neon[1];
1,375,473✔
1212
                dst[pos + 2] = outb_neon[2];
1,375,473✔
1213
#endif
1214
            }
1215
            continue;
4,126,419✔
1216
        }
1217
#endif /* SIMD paths */
1218
        for (y = y_first; y <= y_last; y++) {
8,859,980!
1219
            diff_y = (dsth >= srch)
14,969,014!
1220
                         ? (y + 0.5) - center_y
1,752,520✔
1221
                         : (y + 0.5) * dsth / srch - center_y;
7,484,507!
1222
            weight = f_resample(fabs(diff_y));
7,484,507✔
1223
            for (i = 0; i < depth; i++) {
37,422,535!
1224
                pos = (y * dstw + w) * depth + i;
22,453,521✔
1225
                offsets[i] += tmp[pos] * weight;
22,453,521✔
1226
            }
1227
            total += weight;
7,484,507✔
1228
        }
1229

1230
        if (total > 0.0) {
1,375,473!
1231
            for (i = 0; i < depth; i++) {
5,501,892!
1232
                pos = (h * dstw + w) * depth + i;
4,126,419✔
1233
                dst[pos] = normalize(offsets[i], total);
4,126,419✔
1234
            }
1235
        }
1236
    }
1237
}
14,636✔
1238

1239
#if defined(__GNUC__) && !defined(__clang__)
1240
#pragma GCC diagnostic pop
1241
#endif
1242

1243
static void
1244
scale_with_resampling_serial(
75✔
1245
    unsigned char *dst,
1246
    unsigned char const *src,
1247
    int const srcw,
1248
    int const srch,
1249
    int const dstw,
1250
    int const dsth,
1251
    int const depth,
1252
    resample_fn_t const f_resample,
1253
    double const n,
1254
    unsigned char *tmp)
1255
{
1256
    int y;
75✔
1257
    int h;
75✔
1258
    int simd_level;
75✔
1259

1260
    simd_level = sixel_scale_simd_level();
75✔
1261
#if !defined(SIXEL_USE_AVX512) && !defined(SIXEL_USE_AVX2) && \
1262
    !defined(SIXEL_USE_AVX) && !defined(SIXEL_USE_SSE2) && \
1263
    !defined(SIXEL_USE_NEON)
1264
    /*
1265
     * GCC i686 builds can compile this function without any SIMD backends
1266
     * enabled; consume the detection result to keep the signature stable
1267
     * while avoiding an unused-but-set-variable warning.
1268
     */
1269
    (void)simd_level;
18✔
1270
#endif
1271

1272
    for (y = 0; y < srch; y++) {
32,758✔
1273
        scale_horizontal_row(tmp,
32,608✔
1274
                             src,
1275
                             srcw,
1276
                             dstw,
1277
                             depth,
1278
                             y,
1279
                             f_resample,
1280
                             n,
1281
                             simd_level);
1282
    }
1283

1284
    for (h = 0; h < dsth; h++) {
12,740✔
1285
        scale_vertical_row(dst,
12,665✔
1286
                           tmp,
1287
                           dstw,
1288
                           dsth,
1289
                           depth,
1290
                           srch,
1291
                           h,
1292
                           f_resample,
1293
                           n,
1294
                           simd_level);
1295
    }
1296
}
75✔
1297

1298
#if SIXEL_ENABLE_THREADS
1299
typedef enum scale_parallel_pass {
1300
    SCALE_PASS_HORIZONTAL = 0,
1301
    SCALE_PASS_VERTICAL = 1
1302
} scale_parallel_pass_t;
1303

1304
typedef struct scale_parallel_context {
1305
    unsigned char *dst;
1306
    unsigned char const *src;
1307
    unsigned char *tmp;
1308
    int srcw;
1309
    int srch;
1310
    int dstw;
1311
    int dsth;
1312
    int depth;
1313
    resample_fn_t f_resample;
1314
    double n;
1315
    scale_parallel_pass_t pass;
1316
    int simd_level;
1317
    int band_span;
1318
    sixel_logger_t *logger;
1319
} scale_parallel_context_t;
1320

1321
/*
1322
 * Emit timeline entries for every band so downstream aggregation can compute
1323
 * first/last activity windows per thread without losing information.
1324
 */
1325
static int
1326
scale_parallel_should_log(scale_parallel_context_t const *ctx, int index)
150✔
1327
{
1328
    int span;
150✔
1329

1330
    if (ctx == NULL || ctx->logger == NULL || !ctx->logger->active) {
150!
1331
        return 0;
1332
    }
1333

1334
    if (index < 0) {
×
1335
        return 0;
1336
    }
1337

1338
    if (ctx->pass == SCALE_PASS_HORIZONTAL) {
×
1339
        span = ctx->srch;
1340
    } else {
1341
        span = ctx->dsth;
1342
    }
1343

1344
    if (span <= 0 || index >= span) {
×
1345
        return 0;
1346
    }
1347

1348
    return 1;
1349
}
1350

1351
/*
1352
 * Allow callers to raise the floor for parallel execution using
1353
 * SIXEL_SCALE_PARALLEL_MIN_BYTES. The default of zero preserves the previous
1354
 * eager behavior while permitting deployments to defer threading on tiny
1355
 * inputs.
1356
 */
1357
static size_t
1358
scale_parallel_min_bytes(void)
63✔
1359
{
1360
    static int initialized = 0;
63✔
1361
    static size_t threshold = 0;
63✔
1362
    char const *text;
63✔
1363
    char *endptr;
63✔
1364
    unsigned long long parsed;
63✔
1365

1366
    if (initialized) {
63!
1367
        return threshold;
1368
    }
1369

1370
    initialized = 1;
63✔
1371
    text = sixel_compat_getenv("SIXEL_SCALE_PARALLEL_MIN_BYTES");
63✔
1372
    if (text == NULL || text[0] == '\0') {
63!
1373
        return threshold;
63✔
1374
    }
1375

1376
    errno = 0;
1377
    parsed = strtoull(text, &endptr, 10);
1378
    if (endptr == text || *endptr != '\0' || errno == ERANGE) {
×
1379
        return threshold;
1380
    }
1381

1382
    if (parsed > (unsigned long long)SIZE_MAX) {
×
1383
        threshold = SIZE_MAX;
1384
    } else {
1385
        threshold = (size_t)parsed;
1386
    }
1387

1388
    return threshold;
1389
}
1390

1391
/*
1392
 * Choose the number of rows handled per threadpool job. We prefer an
1393
 * environment override via SIXEL_PARALLEL_FACTOR so deployments can tune
1394
 * queueing overhead. Otherwise derive a span from rows/threads and clamp to
1395
 * [1, rows]. The value is cached after the first lookup.
1396
 */
1397
static int
1398
scale_parallel_band_span(int rows, int threads)
18✔
1399
{
1400
    static int initialized = 0;
18✔
1401
    static int env_span = 0;
18✔
1402
    char const *text;
18✔
1403
    char *endptr;
18✔
1404
    long parsed;
18✔
1405
    int span;
18✔
1406

1407
    if (rows <= 0) {
18!
1408
        return 1;
1409
    }
1410

1411
    if (!initialized) {
18!
1412
        initialized = 1;
9✔
1413
        text = sixel_compat_getenv("SIXEL_PARALLEL_FACTOR");
9✔
1414
        if (text != NULL && text[0] != '\0') {
9!
1415
            errno = 0;
1416
            parsed = strtol(text, &endptr, 10);
1417
            if (endptr != text && *endptr == '\0' && errno != ERANGE &&
×
1418
                parsed > 0 && parsed <= INT_MAX) {
×
1419
                env_span = (int)parsed;
1420
            }
1421
        }
1422
    }
1423

1424
    if (env_span > 0) {
18!
1425
        span = env_span;
1426
    } else {
1427
        span = rows / threads;
18✔
1428
    }
1429

1430
    if (span < 1) {
18!
1431
        span = 1;
1432
    }
1433
    if (span > rows) {
18!
1434
        span = rows;
1435
    }
1436

1437
    return span;
1438
}
1439

1440
static int
1441
scale_parallel_worker(tp_job_t job, void *userdata, void *workspace)
75✔
1442
{
1443
    scale_parallel_context_t *ctx;
75✔
1444
    int index;
75✔
1445
    char const *role;
75✔
1446
    int y0;
75✔
1447
    int y1;
75✔
1448
    int in0;
75✔
1449
    int in1;
75✔
1450
    int limit;
75✔
1451
    int y;
75✔
1452

1453
    (void)workspace;
75✔
1454
    ctx = (scale_parallel_context_t *)userdata;
75✔
1455
    if (ctx == NULL) {
75!
1456
        return SIXEL_BAD_ARGUMENT;
1457
    }
1458

1459
    role = "horizontal";
75✔
1460
    y0 = 0;
75✔
1461
    y1 = 0;
75✔
1462
    in0 = 0;
75✔
1463
    in1 = 0;
75✔
1464
    index = job.band_index;
75✔
1465
    limit = ctx->srch;
75✔
1466
    if (ctx->pass == SCALE_PASS_HORIZONTAL) {
75!
1467
        limit = ctx->srch;
36✔
1468
    } else {
1469
        limit = ctx->dsth;
39✔
1470
    }
1471

1472
    if (index < 0 || index >= limit) {
75!
1473
        return SIXEL_BAD_ARGUMENT;
1474
    }
1475

1476
    y0 = index;
75✔
1477
    y1 = index + ctx->band_span;
75✔
1478
    if (y1 > limit) {
75!
1479
        y1 = limit;
1480
    }
1481

1482
    if (ctx->pass == SCALE_PASS_HORIZONTAL) {
75!
1483
        in1 = ctx->dstw;
36✔
1484
        if (scale_parallel_should_log(ctx, index)) {
36!
1485
            sixel_logger_logf(ctx->logger,
1486
                              role,
1487
                              "scale",
1488
                              "start",
1489
                              index,
1490
                              y1 - 1,
1491
                              y0,
1492
                              y1,
1493
                              in0,
1494
                              in1,
1495
                              "horizontal pass");
1496
        }
1497
        for (y = y0; y < y1; y++) {
13,212!
1498
            scale_horizontal_row(ctx->tmp,
13,176✔
1499
                                 ctx->src,
1500
                                 ctx->srcw,
1501
                                 ctx->dstw,
1502
                                 ctx->depth,
1503
                                 y,
1504
                                 ctx->f_resample,
1505
                                 ctx->n,
1506
                                 ctx->simd_level);
1507
        }
1508
    } else {
1509
        role = "vertical";
39✔
1510
        in1 = ctx->srch;
39✔
1511
        if (scale_parallel_should_log(ctx, index)) {
39!
1512
            sixel_logger_logf(ctx->logger,
1513
                              role,
1514
                              "scale",
1515
                              "start",
1516
                              index,
1517
                              y1 - 1,
1518
                              y0,
1519
                              y1,
1520
                              in0,
1521
                              in1,
1522
                              "vertical pass");
1523
        }
1524
        for (y = y0; y < y1; y++) {
2,010!
1525
            scale_vertical_row(ctx->dst,
1,971✔
1526
                               ctx->tmp,
1,971✔
1527
                               ctx->dstw,
1528
                               ctx->dsth,
1529
                               ctx->depth,
1530
                               ctx->srch,
1531
                               y,
1532
                               ctx->f_resample,
1533
                               ctx->n,
1534
                               ctx->simd_level);
1535
        }
1536
    }
1537

1538
    if (scale_parallel_should_log(ctx, index)) {
75!
1539
        sixel_logger_logf(ctx->logger,
1540
                          role,
1541
                          "scale",
1542
                          "finish",
1543
                          index,
1544
                          y1 - 1,
1545
                          y0,
1546
                          y1,
1547
                          in0,
1548
                          in1,
1549
                          "pass complete");
1550
    }
1551

1552
    return SIXEL_OK;
1553
}
1554

1555
/*
1556
 * Parallel path mirrors the encoder and dither thread selection through
1557
 * sixel_threads_resolve(). Rows are batched into jobs for both passes so the
1558
 * caller can saturate the threadpool without altering the filtering math while
1559
 * reducing queue overhead.
1560
 */
1561
static int
1562
scale_with_resampling_parallel(
63✔
1563
    unsigned char *dst,
1564
    unsigned char const *src,
1565
    int const srcw,
1566
    int const srch,
1567
    int const dstw,
1568
    int const dsth,
1569
    int const depth,
1570
    resample_fn_t const f_resample,
1571
    double const n,
1572
    unsigned char *tmp,
1573
    sixel_logger_t *logger)
1574
{
1575
    scale_parallel_context_t ctx;
63✔
1576
    threadpool_t *pool;
63✔
1577
    tp_job_t job;
63✔
1578
    size_t image_bytes;
63✔
1579
    int threads;
63✔
1580
    int queue_depth;
63✔
1581
    int y;
63✔
1582
    int rc;
63✔
1583
    int logger_ready;
63✔
1584
    int horizontal_span;
63✔
1585
    int vertical_span;
63✔
1586

1587
    image_bytes = (size_t)srcw * (size_t)srch * (size_t)depth;
63✔
1588
    if (image_bytes < scale_parallel_min_bytes()) {
63!
1589
        if (logger != NULL) {
×
1590
            sixel_logger_logf(logger,
1591
                              "controller",
1592
                              "scale",
1593
                              "skip",
1594
                              -1,
1595
                              -1,
1596
                              0,
1597
                              0,
1598
                              0,
1599
                              0,
1600
                              "below threshold bytes=%zu",
1601
                              image_bytes);
1602
        }
1603
        return SIXEL_BAD_ARGUMENT;
1604
    }
1605

1606
    threads = sixel_threads_resolve();
63✔
1607
    if (threads < 2) {
63!
1608
        if (logger != NULL) {
54!
1609
            sixel_logger_logf(logger,
1610
                              "controller",
1611
                              "scale",
1612
                              "skip",
1613
                              -1,
1614
                              -1,
1615
                              0,
1616
                              0,
1617
                              0,
1618
                              0,
1619
                              "threads=%d",
1620
                              threads);
1621
        }
1622
        return SIXEL_BAD_ARGUMENT;
54✔
1623
    }
1624

1625
    logger_ready = logger != NULL && logger->active;
9!
1626
    if (logger_ready) {
×
1627
        sixel_logger_logf(logger,
1628
                          "controller",
1629
                          "scale",
1630
                          "start",
1631
                          -1,
1632
                          -1,
1633
                          0,
1634
                          srch,
1635
                          0,
1636
                          dsth,
1637
                          "parallel scale src=%dx%d dst=%dx%d",
1638
                          srcw,
1639
                          srch,
1640
                          dstw,
1641
                          dsth);
1642
    }
1643

1644
    ctx.dst = dst;
9✔
1645
    ctx.src = src;
9✔
1646
    ctx.tmp = tmp;
9✔
1647
    ctx.srcw = srcw;
9✔
1648
    ctx.srch = srch;
9✔
1649
    ctx.dstw = dstw;
9✔
1650
    ctx.dsth = dsth;
9✔
1651
    ctx.depth = depth;
9✔
1652
    ctx.f_resample = f_resample;
9✔
1653
    ctx.n = n;
9✔
1654
    ctx.simd_level = sixel_scale_simd_level();
9✔
1655
    ctx.logger = logger_ready ? logger : NULL;
9!
1656

1657
    /*
1658
     * Batch rows to reduce queue churn. Prefer the environment override so
1659
     * deployments can pin a consistent span; otherwise derive a default from
1660
     * rows per thread.
1661
     */
1662
    horizontal_span = scale_parallel_band_span(srch, threads);
9✔
1663
    vertical_span = scale_parallel_band_span(dsth, threads);
9✔
1664

1665
    queue_depth = threads * 3;
9✔
1666
    if (queue_depth > srch) {
9!
1667
        queue_depth = srch;
1668
    }
1669
    if (queue_depth < 1) {
9!
1670
        queue_depth = 1;
1671
    }
1672

1673
    ctx.pass = SCALE_PASS_HORIZONTAL;
9✔
1674
    ctx.band_span = horizontal_span;
9✔
1675
    if (logger_ready) {
9!
1676
        sixel_logger_logf(logger,
1677
                          "controller",
1678
                          "scale",
1679
                          "pass_start",
1680
                          -1,
1681
                          0,
1682
                          0,
1683
                          srch,
1684
                          0,
1685
                          ctx.dstw,
1686
                          "horizontal queue=%d threads=%d",
1687
                          queue_depth,
1688
                          threads);
1689
    }
1690
    pool = threadpool_create(threads,
9✔
1691
                             queue_depth,
1692
                             0,
1693
                             scale_parallel_worker,
1694
                             &ctx,
1695
                             NULL);
1696
    if (pool == NULL) {
9!
1697
        return SIXEL_BAD_ALLOCATION;
1698
    }
1699

1700
    for (y = 0; y < srch; y += horizontal_span) {
45!
1701
        job.band_index = y;
36✔
1702
        threadpool_push(pool, job);
36✔
1703
    }
1704
    threadpool_finish(pool);
9✔
1705
    rc = threadpool_get_error(pool);
9✔
1706
    threadpool_destroy(pool);
9✔
1707
    if (rc != SIXEL_OK) {
9!
1708
        return rc;
1709
    }
1710

1711
    if (logger_ready) {
9!
1712
        sixel_logger_logf(logger,
1713
                          "controller",
1714
                          "scale",
1715
                          "pass_finish",
1716
                          -1,
1717
                          srch - 1,
1718
                          0,
1719
                          srch,
1720
                          0,
1721
                          ctx.dstw,
1722
                          "horizontal complete");
1723
    }
1724

1725
    queue_depth = threads * 3;
9✔
1726
    if (queue_depth > dsth) {
9!
1727
        queue_depth = dsth;
1728
    }
1729
    if (queue_depth < 1) {
9!
1730
        queue_depth = 1;
1731
    }
1732

1733
    ctx.pass = SCALE_PASS_VERTICAL;
9✔
1734
    ctx.band_span = vertical_span;
9✔
1735
    if (logger_ready) {
9!
1736
        sixel_logger_logf(logger,
1737
                          "controller",
1738
                          "scale",
1739
                          "pass_start",
1740
                          -1,
1741
                          0,
1742
                          0,
1743
                          dsth,
1744
                          0,
1745
                          ctx.srch,
1746
                          "vertical queue=%d threads=%d",
1747
                          queue_depth,
1748
                          threads);
1749
    }
1750
    pool = threadpool_create(threads,
9✔
1751
                             queue_depth,
1752
                             0,
1753
                             scale_parallel_worker,
1754
                             &ctx,
1755
                             NULL);
1756
    if (pool == NULL) {
9!
1757
        return SIXEL_BAD_ALLOCATION;
1758
    }
1759

1760
    for (y = 0; y < dsth; y += vertical_span) {
48!
1761
        job.band_index = y;
39✔
1762
        threadpool_push(pool, job);
39✔
1763
    }
1764
    threadpool_finish(pool);
9✔
1765
    rc = threadpool_get_error(pool);
9✔
1766
    threadpool_destroy(pool);
9✔
1767

1768
    if (logger_ready) {
9!
1769
        sixel_logger_logf(logger,
1770
                          "controller",
1771
                          "scale",
1772
                          "pass_finish",
1773
                          -1,
1774
                          dsth - 1,
1775
                          0,
1776
                          dsth,
1777
                          0,
1778
                          ctx.srch,
1779
                          "vertical complete rc=%d",
1780
                          rc);
1781
        sixel_logger_logf(logger,
1782
                          "controller",
1783
                          "scale",
1784
                          "finish",
1785
                          -1,
1786
                          dsth - 1,
1787
                          0,
1788
                          dsth,
1789
                          0,
1790
                          ctx.srch,
1791
                          "parallel scale status=%d",
1792
                          rc);
1793
    }
1794

1795
    return rc;
1796
}
1797
#endif /* SIXEL_ENABLE_THREADS */
1798

1799
/*
1800
 * Allocate shared scratch storage and attempt the parallel pipeline first so
1801
 * larger inputs benefit from threading while smaller ones retain the serial
1802
 * behavior.
1803
 */
1804
static void
1805
scale_with_resampling(
84✔
1806
    unsigned char *dst,
1807
    unsigned char const *src,
1808
    int const srcw,
1809
    int const srch,
1810
    int const dstw,
1811
    int const dsth,
1812
    int const depth,
1813
    resample_fn_t const f_resample,
1814
    double n,
1815
    sixel_allocator_t *allocator)
1816
{
1817
    unsigned char *tmp;
84✔
1818
    size_t tmp_size;
84✔
1819
#if SIXEL_ENABLE_THREADS
1820
    int rc;
63✔
1821
    sixel_logger_t logger;
63✔
1822
    int logger_prepared;
63✔
1823
#endif
1824

1825
#if SIXEL_ENABLE_THREADS
1826
    sixel_logger_init(&logger);
63✔
1827
    logger_prepared = 0;
63✔
1828
    (void)sixel_logger_prepare_env(&logger);
63✔
1829
    logger_prepared = logger.active;
63✔
1830
#endif
1831

1832
    tmp_size = (size_t)dstw * (size_t)srch * (size_t)depth;
84✔
1833
    tmp = (unsigned char *)sixel_allocator_malloc(allocator, tmp_size);
84✔
1834
    if (tmp == NULL) {
84!
1835
#if SIXEL_ENABLE_THREADS
1836
        if (logger_prepared) {
×
1837
            sixel_logger_close(&logger);
1838
        }
1839
#endif
1840
        return;
9✔
1841
    }
1842

1843
#if SIXEL_ENABLE_THREADS
1844
    rc = scale_with_resampling_parallel(dst,
126!
1845
                                        src,
1846
                                        srcw,
1847
                                        srch,
1848
                                        dstw,
1849
                                        dsth,
1850
                                        depth,
1851
                                        f_resample,
1852
                                        n,
1853
                                        tmp,
1854
                                        logger_prepared
1!
1855
                                            ? &logger
1856
                                            : NULL);
1857
    if (rc == SIXEL_OK) {
63!
1858
        sixel_allocator_free(allocator, tmp);
9✔
1859
        if (logger_prepared) {
9!
1860
            sixel_logger_close(&logger);
1861
        }
1862
        return;
9✔
1863
    }
1864

1865
    if (logger_prepared) {
54!
1866
        sixel_logger_logf(&logger,
1867
                          "controller",
1868
                          "scale",
1869
                          "fallback",
1870
                          -1,
1871
                          -1,
1872
                          0,
1873
                          dsth,
1874
                          0,
1875
                          srch,
1876
                          "parallel rc=%d",
1877
                          rc);
1878
    }
1879
#endif
1880

1881
    scale_with_resampling_serial(dst,
75✔
1882
                                 src,
1883
                                 srcw,
1884
                                 srch,
1885
                                 dstw,
1886
                                 dsth,
1887
                                 depth,
1888
                                 f_resample,
1889
                                 n,
1890
                                 tmp);
1891

1892
    sixel_allocator_free(allocator, tmp);
75✔
1893
#if SIXEL_ENABLE_THREADS
1894
    if (logger_prepared) {
54!
1895
        sixel_logger_close(&logger);
1896
    }
1897
#endif
1898
}
1!
1899

1900
/*
1901
 * Floating-point scaler mirrors the byte-path SSE2 usage. Keep it noinline
1902
 * on i386 so the SIXEL_ALIGN_STACK prologue stays in place when SSE2 locals
1903
 * need to spill to the stack.
1904
 */
1905
static SIXEL_ALIGN_STACK SIXEL_NO_INLINE void
1906
scale_with_resampling_float32(
37✔
1907
    float *dst,
1908
    float const *src,
1909
    int const srcw,
1910
    int const srch,
1911
    int const dstw,
1912
    int const dsth,
1913
    int const depth,
1914
    resample_fn_t const f_resample,
1915
    double n,
1916
    sixel_allocator_t *allocator)
1917
{
1918
    int w;
37✔
1919
    int h;
37✔
1920
    int x;
37✔
1921
    int y;
37✔
1922
    int i;
37✔
1923
    int pos;
37✔
1924
    int x_first;
37✔
1925
    int x_last;
37✔
1926
    int y_first;
37✔
1927
    int y_last;
37✔
1928
    double center_x;
37✔
1929
    double center_y;
37✔
1930
    double diff_x;
37✔
1931
    double diff_y;
37✔
1932
    double weight;
37✔
1933
    double total;
37✔
1934
    double offsets[8];
37✔
1935
    float *tmp;
37✔
1936
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
1937
    float vecbuf[4];
28✔
1938
#endif
1939
    int simd_level;
37✔
1940
#if defined(SIXEL_USE_AVX512)
1941
    __m512 acc512;
1942
    __m512 pix512;
1943
#endif
1944
#if defined(SIXEL_USE_AVX2) || defined(SIXEL_USE_AVX)
1945
    __m256 acc256;
1946
#endif
1947
#if defined(SIXEL_USE_SSE2)
1948
    __m128 acc128;
19✔
1949
    __m128 pixf128;
19✔
1950
    __m128 wv128;
19✔
1951
    __m128 scalev128;
19✔
1952
    __m128 minv128;
19✔
1953
    __m128 maxv128;
19✔
1954
#elif defined(SIXEL_USE_NEON)
1955
    float32x4_t acc_neon;
9✔
1956
    float32x4_t pixf_neon;
9✔
1957
    float32x4_t wv_neon;
9✔
1958
    float32x4_t scalev_neon;
9✔
1959
    float32x4_t minv_neon;
9✔
1960
    float32x4_t maxv_neon;
9✔
1961
#endif
1962

1963
    tmp = (float *)sixel_allocator_malloc(
74✔
1964
        allocator,
1965
        (size_t)(dstw * srch * depth * (int)sizeof(float)));
37✔
1966
    if (tmp == NULL) {
37!
1967
        return;
×
1968
    }
1969

1970
    simd_level = sixel_scale_simd_level();
37✔
1971
#if !defined(SIXEL_USE_AVX512) && !defined(SIXEL_USE_AVX2) && \
1972
    !defined(SIXEL_USE_AVX) && !defined(SIXEL_USE_SSE2) && \
1973
    !defined(SIXEL_USE_NEON)
1974
    /*
1975
     * GCC i686 builds can reach this function with every SIMD backend
1976
     * compiled out; acknowledge the detection result to avoid an unused
1977
     * write while keeping the signature intact.
1978
     */
1979
    (void)simd_level;
9✔
1980
#endif
1981

1982
    for (y = 0; y < srch; y++) {
14,479✔
1983
        for (w = 0; w < dstw; w++) {
2,336,415✔
1984
            total = 0.0;
9,288,040✔
1985
            for (i = 0; i < depth; i++) {
9,288,040✔
1986
                offsets[i] = 0.0;
6,966,030✔
1987
            }
1988

1989
        if (dstw >= srcw) {
2,322,010✔
1990
            center_x = (w + 0.5) * srcw / dstw;
10✔
1991
            x_first = MAX((int)(center_x - n), 0);
10✔
1992
            x_last = MIN((int)(center_x + n), srcw - 1);
10✔
1993
        } else {
1994
            center_x = w + 0.5;
2,322,000✔
1995
            x_first = MAX((int)floor((center_x - n) * srcw / dstw), 0);
2,322,000✔
1996
            x_last = MIN((int)floor((center_x + n) * srcw / dstw),
2,322,000✔
1997
                         srcw - 1);
1998
        }
1999

2000
#if defined(SIXEL_USE_AVX512)
2001
            if (depth == 3 &&
2002
                simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
2003
                acc512 = sixel_avx512_zero_ps();
2004

2005
                for (x = x_first; x <= x_last; x++) {
2006
                    diff_x = (dstw >= srcw)
2007
                                 ? (x + 0.5) - center_x
2008
                                 : (x + 0.5) * srcw / dstw - center_x;
2009
                    weight = f_resample(fabs(diff_x));
2010
                    pos = (y * srcw + x) * depth;
2011
                    pix512 = sixel_avx512_load_rgb_f32(src + pos);
2012
                    acc512 = sixel_avx512_muladd_ps(
2013
                        acc512,
2014
                        pix512,
2015
                        (float)weight);
2016
                    total += weight;
2017
                }
2018
                if (total > 0.0) {
2019
                    pos = (y * dstw + w) * depth;
2020
                    sixel_avx512_store_rgb_f32(&acc512, total, tmp + pos);
2021
                }
2022
            } else
2023
#endif
2024
#if defined(SIXEL_USE_AVX2)
2025
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
2026
                acc256 = sixel_avx2_zero_ps();
2027

2028
                for (x = x_first; x <= x_last; x++) {
2029
                    diff_x = (dstw >= srcw)
2030
                                 ? (x + 0.5) - center_x
2031
                                 : (x + 0.5) * srcw / dstw - center_x;
2032
                    weight = f_resample(fabs(diff_x));
2033
                    pos = (y * srcw + x) * depth;
2034
                    acc256 = sixel_avx2_muladd_ps(
2035
                        acc256,
2036
                        sixel_avx2_load_rgb_f32(src + pos),
2037
                        (float)weight);
2038
                    total += weight;
2039
                }
2040
                if (total > 0.0) {
2041
                    pos = (y * dstw + w) * depth;
2042
                    sixel_avx2_store_rgb_f32(acc256, total, tmp + pos);
2043
                }
2044
            } else
2045
#endif
2046
#if defined(SIXEL_USE_AVX)
2047
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
2048
                acc256 = sixel_avx_zero_ps();
2049

2050
                for (x = x_first; x <= x_last; x++) {
2051
                    diff_x = (dstw >= srcw)
2052
                                 ? (x + 0.5) - center_x
2053
                                 : (x + 0.5) * srcw / dstw - center_x;
2054
                    weight = f_resample(fabs(diff_x));
2055
                    pos = (y * srcw + x) * depth;
2056
                    acc256 = sixel_avx_muladd_ps(
2057
                        acc256,
2058
                        sixel_avx_load_rgb_f32(src + pos),
2059
                        (float)weight);
2060
                    total += weight;
2061
                }
2062
                if (total > 0.0) {
2063
                    pos = (y * dstw + w) * depth;
2064
                    sixel_avx_store_rgb_f32(acc256, total, tmp + pos);
2065
                }
2066
            } else
2067
#endif
2068
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
2069
            if (depth == 3
1,741,508!
2070
# if defined(SIXEL_USE_SSE2)
2071
                && simd_level >= SIXEL_SIMD_LEVEL_SSE2
1,161,006!
2072
# elif defined(SIXEL_USE_NEON)
2073
                && simd_level >= SIXEL_SIMD_LEVEL_NEON
580,502!
2074
# endif
2075
                ) {
2076
#if defined(SIXEL_USE_SSE2)
2077
                acc128 = _mm_setzero_ps();
2078
                minv128 = _mm_set1_ps(0.0f);
2079
                maxv128 = _mm_set1_ps(1.0f);
2080
#elif defined(SIXEL_USE_NEON)
2081
                acc_neon = vdupq_n_f32(0.0f);
2082
                minv_neon = vdupq_n_f32(0.0f);
2083
                maxv_neon = vdupq_n_f32(1.0f);
2084
#endif
2085
                for (x = x_first; x <= x_last; x++) {
27,573,766✔
2086
                    diff_x = (dstw >= srcw)
51,664,516✔
2087
                                 ? (x + 0.5) - center_x
8✔
2088
                                 : (x + 0.5) * srcw / dstw - center_x;
25,832,258✔
2089
                    weight = f_resample(fabs(diff_x));
25,832,258✔
2090
                    pos = (y * srcw + x) * depth;
25,832,258✔
2091
                    const float *psrc = src + pos;
25,832,258✔
2092
#if defined(SIXEL_USE_SSE2)
2093
                    pixf128 = _mm_set_ps(
17,221,506✔
2094
                        0.0f, psrc[2], psrc[1], psrc[0]);
2095
                    wv128 = _mm_set1_ps((float)weight);
17,221,506✔
2096
                    acc128 = _mm_add_ps(acc128,
17,221,506✔
2097
                                        _mm_mul_ps(pixf128, wv128));
2098
#else /* NEON */
2099
                    /*
2100
                     * Expand the RGB triple into a NEON vector without
2101
                     * brace initialization to keep older toolchains
2102
                     * happy.
2103
                     */
2104
                    pixf_neon = vdupq_n_f32(0.0f);
8,610,752✔
2105
                    pixf_neon = vsetq_lane_f32(psrc[0], pixf_neon, 0);
8,610,752✔
2106
                    pixf_neon = vsetq_lane_f32(psrc[1], pixf_neon, 1);
8,610,752✔
2107
                    pixf_neon = vsetq_lane_f32(psrc[2], pixf_neon, 2);
8,610,752✔
2108
                    wv_neon = vdupq_n_f32((float)weight);
8,610,752✔
2109
                    acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
8,610,752✔
2110
#endif
2111
                    total += weight;
25,832,258✔
2112
                }
2113
                if (total > 0.0) {
1,741,508✔
2114
#if defined(SIXEL_USE_SSE2)
2115
                    scalev128 = _mm_set1_ps((float)(1.0 / total));
128,706✔
2116
                    acc128 = _mm_mul_ps(acc128, scalev128);
128,706✔
2117
                    acc128 = _mm_max_ps(minv128,
128,706✔
2118
                                        _mm_min_ps(acc128, maxv128));
2119
                    _mm_storeu_ps(vecbuf, acc128);
128,706✔
2120
#else /* NEON */
2121
                    scalev_neon = vdupq_n_f32(
64,352✔
2122
                        (float)(1.0 / total));
64,352✔
2123
                    acc_neon = vmulq_f32(acc_neon, scalev_neon);
64,352✔
2124
                    acc_neon = vmaxq_f32(minv_neon,
64,352✔
2125
                                         vminq_f32(acc_neon, maxv_neon));
2126
                    vst1q_f32(vecbuf, acc_neon);
64,352✔
2127
#endif
2128
                    pos = (y * dstw + w) * depth;
193,058✔
2129
                    tmp[pos + 0] = vecbuf[0];
193,058✔
2130
                    tmp[pos + 1] = vecbuf[1];
193,058✔
2131
                    tmp[pos + 2] = vecbuf[2];
193,058✔
2132
                }
2133
            } else
2134
#endif
2135
            {
2136
                for (x = x_first; x <= x_last; x++) {
9,191,254!
2137
                    diff_x = (dstw >= srcw)
17,221,504!
2138
                                 ? (x + 0.5) - center_x
2✔
2139
                                 : (x + 0.5) * srcw / dstw - center_x;
8,610,752!
2140
                    weight = f_resample(fabs(diff_x));
8,610,752✔
2141
                    for (i = 0; i < depth; i++) {
43,053,760!
2142
                        pos = (y * srcw + x) * depth + i;
25,832,256✔
2143
                        offsets[i] += src[pos] * weight;
25,832,256✔
2144
                    }
2145
                    total += weight;
8,610,752✔
2146
                }
2147

2148
                if (total > 0.0) {
580,502!
2149
                    for (i = 0; i < depth; i++) {
257,408!
2150
                        pos = (y * dstw + w) * depth + i;
193,056✔
2151
                        tmp[pos] = sixel_clamp_unit_f32(
193,056✔
2152
                            (float)(offsets[i] / total));
193,056!
2153
                    }
2154
                }
2155
            }
2156
        }
2157
    }
2158

2159
    for (h = 0; h < dsth; h++) {
4,007✔
2160
        for (w = 0; w < dstw; w++) {
696,710✔
2161
            total = 0.0;
2,770,960✔
2162
            for (i = 0; i < depth; i++) {
2,770,960✔
2163
                offsets[i] = 0.0;
2,078,220✔
2164
            }
2165

2166
            if (dsth >= srch) {
692,740✔
2167
                center_y = (h + 0.5) * srch / dsth;
20✔
2168
                y_first = MAX((int)(center_y - n), 0);
20✔
2169
                y_last = MIN((int)(center_y + n), srch - 1);
20✔
2170
            } else {
2171
                center_y = h + 0.5;
692,720✔
2172
                y_first = MAX((int)floor((center_y - n) * srch / dsth), 0);
692,720✔
2173
                y_last = MIN((int)floor((center_y + n) * srch / dsth),
692,720✔
2174
                             srch - 1);
2175
            }
2176

2177
#if defined(SIXEL_USE_AVX512)
2178
            if (depth == 3 &&
2179
                simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
2180
                acc512 = sixel_avx512_zero_ps();
2181

2182
                for (y = y_first; y <= y_last; y++) {
2183
                    diff_y = (dsth >= srch)
2184
                                 ? (y + 0.5) - center_y
2185
                                 : (y + 0.5) * dsth / srch - center_y;
2186
                    weight = f_resample(fabs(diff_y));
2187
                    pos = (y * dstw + w) * depth;
2188
                    pix512 = sixel_avx512_load_rgb_f32(tmp + pos);
2189
                    acc512 = sixel_avx512_muladd_ps(
2190
                        acc512,
2191
                        pix512,
2192
                        (float)weight);
2193
                    total += weight;
2194
                }
2195
                if (total > 0.0) {
2196
                    pos = (h * dstw + w) * depth;
2197
                    sixel_avx512_store_rgb_f32(&acc512, total, dst + pos);
2198
                }
2199
            } else
2200
#endif
2201
#if defined(SIXEL_USE_AVX2)
2202
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
2203
                acc256 = sixel_avx2_zero_ps();
2204

2205
                for (y = y_first; y <= y_last; y++) {
2206
                    diff_y = (dsth >= srch)
2207
                                 ? (y + 0.5) - center_y
2208
                                 : (y + 0.5) * dsth / srch - center_y;
2209
                    weight = f_resample(fabs(diff_y));
2210
                    pos = (y * dstw + w) * depth;
2211
                    acc256 = sixel_avx2_muladd_ps(
2212
                        acc256,
2213
                        sixel_avx2_load_rgb_f32(tmp + pos),
2214
                        (float)weight);
2215
                    total += weight;
2216
                }
2217
                if (total > 0.0) {
2218
                    pos = (h * dstw + w) * depth;
2219
                    sixel_avx2_store_rgb_f32(acc256, total, dst + pos);
2220
                }
2221
            } else
2222
#endif
2223
#if defined(SIXEL_USE_AVX)
2224
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
2225
                acc256 = sixel_avx_zero_ps();
2226

2227
                for (y = y_first; y <= y_last; y++) {
2228
                    diff_y = (dsth >= srch)
2229
                                 ? (y + 0.5) - center_y
2230
                                 : (y + 0.5) * dsth / srch - center_y;
2231
                    weight = f_resample(fabs(diff_y));
2232
                    pos = (y * dstw + w) * depth;
2233
                    acc256 = sixel_avx_muladd_ps(
2234
                        acc256,
2235
                        sixel_avx_load_rgb_f32(tmp + pos),
2236
                        (float)weight);
2237
                    total += weight;
2238
                }
2239
                if (total > 0.0) {
2240
                    pos = (h * dstw + w) * depth;
2241
                    sixel_avx_store_rgb_f32(acc256, total, dst + pos);
2242
                }
2243
            } else
2244
#endif
2245
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
2246
            if (depth == 3
519,556!
2247
# if defined(SIXEL_USE_SSE2)
2248
                && simd_level >= SIXEL_SIMD_LEVEL_SSE2
346,372!
2249
# elif defined(SIXEL_USE_NEON)
2250
                && simd_level >= SIXEL_SIMD_LEVEL_NEON
173,184!
2251
# endif
2252
                ) {
2253
#if defined(SIXEL_USE_SSE2)
2254
                acc128 = _mm_setzero_ps();
2255
                minv128 = _mm_set1_ps(0.0f);
2256
                maxv128 = _mm_set1_ps(1.0f);
2257
#elif defined(SIXEL_USE_NEON)
2258
                acc_neon = vdupq_n_f32(0.0f);
2259
                minv_neon = vdupq_n_f32(0.0f);
2260
                maxv_neon = vdupq_n_f32(1.0f);
2261
#endif
2262
                for (y = y_first; y <= y_last; y++) {
7,767,332✔
2263
                    diff_y = (dsth >= srch)
14,495,552✔
2264
                                 ? (y + 0.5) - center_y
16✔
2265
                                 : (y + 0.5) * dsth / srch - center_y;
7,247,776✔
2266
                    weight = f_resample(fabs(diff_y));
7,247,776✔
2267
                    pos = (y * dstw + w) * depth;
7,247,776✔
2268
                    const float *psrc = tmp + pos;
7,247,776✔
2269
#if defined(SIXEL_USE_SSE2)
2270
                    pixf128 = _mm_set_ps(
4,831,852✔
2271
                        0.0f, psrc[2], psrc[1], psrc[0]);
2272
                    wv128 = _mm_set1_ps((float)weight);
4,831,852✔
2273
                    acc128 = _mm_add_ps(acc128,
4,831,852✔
2274
                                        _mm_mul_ps(pixf128, wv128));
2275
#else /* NEON */
2276
                    /*
2277
                     * Expand the RGB triple into a NEON vector without
2278
                     * brace initialization to keep older toolchains
2279
                     * happy.
2280
                     */
2281
                    pixf_neon = vdupq_n_f32(0.0f);
2,415,924✔
2282
                    pixf_neon = vsetq_lane_f32(psrc[0], pixf_neon, 0);
2,415,924✔
2283
                    pixf_neon = vsetq_lane_f32(psrc[1], pixf_neon, 1);
2,415,924✔
2284
                    pixf_neon = vsetq_lane_f32(psrc[2], pixf_neon, 2);
2,415,924✔
2285
                    wv_neon = vdupq_n_f32((float)weight);
2,415,924✔
2286
                    acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
2,415,924✔
2287
#endif
2288
                    total += weight;
7,247,776✔
2289
                }
2290
                if (total > 0.0) {
519,556!
2291
#if defined(SIXEL_USE_SSE2)
2292
                    scalev128 = _mm_set1_ps((float)(1.0 / total));
346,372✔
2293
                    acc128 = _mm_mul_ps(acc128, scalev128);
346,372✔
2294
                    acc128 = _mm_max_ps(minv128,
346,372✔
2295
                                        _mm_min_ps(acc128, maxv128));
2296
                    _mm_storeu_ps(vecbuf, acc128);
346,372✔
2297
#else /* NEON */
2298
                    scalev_neon = vdupq_n_f32(
173,184✔
2299
                        (float)(1.0 / total));
173,184✔
2300
                    acc_neon = vmulq_f32(acc_neon, scalev_neon);
173,184✔
2301
                    acc_neon = vmaxq_f32(minv_neon,
173,184✔
2302
                                         vminq_f32(acc_neon, maxv_neon));
2303
                    vst1q_f32(vecbuf, acc_neon);
173,184✔
2304
#endif
2305
                    pos = (h * dstw + w) * depth;
519,556✔
2306
                    dst[pos + 0] = vecbuf[0];
519,556✔
2307
                    dst[pos + 1] = vecbuf[1];
519,556✔
2308
                    dst[pos + 2] = vecbuf[2];
519,556✔
2309
                }
2310
            } else
2311
#endif
2312
            {
2313
                for (y = y_first; y <= y_last; y++) {
2,589,108!
2314
                    diff_y = (dsth >= srch)
4,831,848!
2315
                                 ? (y + 0.5) - center_y
4✔
2316
                                 : (y + 0.5) * dsth / srch - center_y;
2,415,924!
2317
                    weight = f_resample(fabs(diff_y));
2,415,924✔
2318
                    for (i = 0; i < depth; i++) {
12,079,620!
2319
                        pos = (y * dstw + w) * depth + i;
7,247,772✔
2320
                        offsets[i] += tmp[pos] * weight;
7,247,772✔
2321
                    }
2322
                    total += weight;
2,415,924✔
2323
                }
2324

2325
                if (total > 0.0) {
173,184!
2326
                    for (i = 0; i < depth; i++) {
692,736!
2327
                        pos = (h * dstw + w) * depth + i;
519,552✔
2328
                        dst[pos] = sixel_clamp_unit_f32(
519,552✔
2329
                            (float)(offsets[i] / total));
519,552!
2330
                    }
2331
                }
2332
            }
2333
        }
2334
    }
2335

2336
    sixel_allocator_free(allocator, tmp);
37✔
2337
}
1!
2338

2339

2340
SIXELAPI int
2341
sixel_helper_scale_image(
93✔
2342
    unsigned char       /* out */ *dst,
2343
    unsigned char const /* in */  *src,                   /* source image data */
2344
    int                 /* in */  srcw,                   /* source image width */
2345
    int                 /* in */  srch,                   /* source image height */
2346
    int                 /* in */  pixelformat,            /* one of enum pixelFormat */
2347
    int                 /* in */  dstw,                   /* destination image width */
2348
    int                 /* in */  dsth,                   /* destination image height */
2349
    int                 /* in */  method_for_resampling,  /* one of methodForResampling */
2350
    sixel_allocator_t   /* in */  *allocator)             /* allocator object */
2351
{
2352
    /*
2353
     * Convert the source image to RGB24 if necessary and scale it to the
2354
     * requested destination size.  The caller supplies an allocator used
2355
     * for any temporary buffers required during conversion or filtering.
2356
     */
2357
    int const depth = sixel_helper_compute_depth(pixelformat);
93✔
2358
    unsigned char *new_src = NULL;  /* optional converted source buffer */
93✔
2359
    int nret;
93✔
2360
    int new_pixelformat;
93✔
2361

2362
    /* ensure the scaler operates on RGB triples */
2363
    if (depth != 3) {
93!
2364
        new_src = (unsigned char *)sixel_allocator_malloc(allocator,
×
2365
                                                          (size_t)(srcw * srch * 3));
×
2366
        if (new_src == NULL) {
×
2367
            return (-1);
2368
        }
2369
        nret = sixel_helper_normalize_pixelformat(new_src,
×
2370
                                                  &new_pixelformat,
2371
                                                  src, pixelformat,
2372
                                                  srcw, srch);
2373
        if (nret != 0) {
×
2374
            sixel_allocator_free(allocator, new_src);
×
2375
            return (-1);
×
2376
        }
2377

2378
        src = new_src;  /* use converted buffer from here on */
2379
    } else {
2380
        new_pixelformat = pixelformat;
93✔
2381
    }
2382

2383
    /* choose re-sampling strategy */
2384
    switch (method_for_resampling) {
93!
2385
    case SIXEL_RES_NEAREST:
9✔
2386
        scale_without_resampling(dst, src, srcw, srch, dstw, dsth, depth);
9✔
2387
        break;
9✔
2388
    case SIXEL_RES_GAUSSIAN:
4✔
2389
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
4✔
2390
                              gaussian, 1.0, allocator);
2391
        break;
4✔
2392
    case SIXEL_RES_HANNING:
×
2393
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
×
2394
                              hanning, 1.0, allocator);
2395
        break;
×
2396
    case SIXEL_RES_HAMMING:
4✔
2397
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
4✔
2398
                              hamming, 1.0, allocator);
2399
        break;
4✔
2400
    case SIXEL_RES_WELSH:
×
2401
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
×
2402
                              welsh, 1.0, allocator);
2403
        break;
×
2404
    case SIXEL_RES_BICUBIC:
4✔
2405
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
4✔
2406
                              bicubic, 2.0, allocator);
2407
        break;
4✔
2408
    case SIXEL_RES_LANCZOS2:
×
2409
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
×
2410
                              lanczos2, 2.0, allocator);
2411
        break;
×
2412
    case SIXEL_RES_LANCZOS3:
4✔
2413
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
4✔
2414
                              lanczos3, 3.0, allocator);
2415
        break;
4✔
2416
    case SIXEL_RES_LANCZOS4:
×
2417
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
×
2418
                              lanczos4, 4.0, allocator);
2419
        break;
×
2420
    case SIXEL_RES_BILINEAR:
68✔
2421
    default:
2422
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
68✔
2423
                              bilinear, 1.0, allocator);
2424
        break;
68✔
2425
    }
2426

2427
    /* release temporary copy created for pixel-format normalization */
2428
    sixel_allocator_free(allocator, new_src);
93✔
2429
    return 0;
93✔
2430
}
2431

2432
SIXELAPI int
2433
sixel_helper_scale_image_float32(
57✔
2434
    float             /* out */ *dst,
2435
    float const       /* in */  *src,
2436
    int               /* in */  srcw,
2437
    int               /* in */  srch,
2438
    int               /* in */  pixelformat,
2439
    int               /* in */  dstw,
2440
    int               /* in */  dsth,
2441
    int               /* in */  method_for_resampling,
2442
    sixel_allocator_t /* in */  *allocator)
2443
{
2444
    int depth;
57✔
2445
    int depth_bytes;
57✔
2446

2447
    depth_bytes = sixel_helper_compute_depth(pixelformat);
57✔
2448
    if (depth_bytes <= 0) {
57!
2449
        return (-1);
2450
    }
2451

2452
    depth = depth_bytes / (int)sizeof(float);
57✔
2453
    if (depth * (int)sizeof(float) != depth_bytes) {
57!
2454
        return (-1);
2455
    }
2456

2457
    switch (method_for_resampling) {
57!
2458
    case SIXEL_RES_NEAREST:
20✔
2459
        scale_without_resampling_float32(
20✔
2460
            dst, src, srcw, srch, dstw, dsth, depth);
2461
        break;
20✔
2462
    case SIXEL_RES_GAUSSIAN:
×
2463
        scale_with_resampling_float32(
×
2464
            dst, src, srcw, srch, dstw, dsth, depth,
2465
            gaussian, 1.0, allocator);
2466
        break;
×
2467
    case SIXEL_RES_HANNING:
4✔
2468
        scale_with_resampling_float32(
4✔
2469
            dst, src, srcw, srch, dstw, dsth, depth,
2470
            hanning, 1.0, allocator);
2471
        break;
4✔
2472
    case SIXEL_RES_HAMMING:
×
2473
        scale_with_resampling_float32(
×
2474
            dst, src, srcw, srch, dstw, dsth, depth,
2475
            hamming, 1.0, allocator);
2476
        break;
×
2477
    case SIXEL_RES_WELSH:
4✔
2478
        scale_with_resampling_float32(
4✔
2479
            dst, src, srcw, srch, dstw, dsth, depth,
2480
            welsh, 1.0, allocator);
2481
        break;
4✔
2482
    case SIXEL_RES_BICUBIC:
5✔
2483
        scale_with_resampling_float32(
5✔
2484
            dst, src, srcw, srch, dstw, dsth, depth,
2485
            bicubic, 2.0, allocator);
2486
        break;
5✔
2487
    case SIXEL_RES_LANCZOS2:
8✔
2488
        scale_with_resampling_float32(
8✔
2489
            dst, src, srcw, srch, dstw, dsth, depth,
2490
            lanczos2, 2.0, allocator);
2491
        break;
8✔
2492
    case SIXEL_RES_LANCZOS3:
4✔
2493
        scale_with_resampling_float32(
4✔
2494
            dst, src, srcw, srch, dstw, dsth, depth,
2495
            lanczos3, 3.0, allocator);
2496
        break;
4✔
2497
    case SIXEL_RES_LANCZOS4:
4✔
2498
        scale_with_resampling_float32(
4✔
2499
            dst, src, srcw, srch, dstw, dsth, depth,
2500
            lanczos4, 4.0, allocator);
2501
        break;
4✔
2502
    case SIXEL_RES_BILINEAR:
8✔
2503
    default:
2504
        scale_with_resampling_float32(
8✔
2505
            dst, src, srcw, srch, dstw, dsth, depth,
2506
            bilinear, 1.0, allocator);
2507
        break;
8✔
2508
    }
2509

2510
    return 0;
2511
}
2512

2513
#if HAVE_TESTS
2514

2515
static void
2516
reference_scale(
×
2517
    unsigned char *dst,
2518
    unsigned char const *src,
2519
    int const srcw,
2520
    int const srch,
2521
    int const dstw,
2522
    int const dsth,
2523
    int const depth)
2524
{
2525
    int w;
×
2526
    int h;
×
2527
    int x;
×
2528
    int y;
×
2529
    int i;
×
2530
    int pos;
×
2531

2532
    for (h = 0; h < dsth; h++) {
×
2533
        for (w = 0; w < dstw; w++) {
×
2534
            x = (long)w * srcw / dstw;
×
2535
            y = (long)h * srch / dsth;
×
2536
            for (i = 0; i < depth; i++) {
×
2537
                pos = (y * srcw + x) * depth + i;
×
2538
                dst[(h * dstw + w) * depth + i] = src[pos];
×
2539
            }
2540
        }
2541
    }
2542
}
×
2543

2544
static int
2545
test_without_resampling_case(
×
2546
    int srcw,
2547
    int srch,
2548
    int dstw,
2549
    int dsth,
2550
    int depth)
2551
{
2552
    int nret = EXIT_FAILURE;
×
2553
    size_t srcsize = (size_t)srcw * srch * depth;
×
2554
    size_t dstsize = (size_t)dstw * dsth * depth;
×
2555
    unsigned char *src = NULL;
×
2556
    unsigned char *ref = NULL;
×
2557
    unsigned char *out = NULL;
×
2558
    size_t i;
×
2559

2560
    src = (unsigned char *)malloc(srcsize);
×
2561
    ref = (unsigned char *)malloc(dstsize);
×
2562
    out = (unsigned char *)malloc(dstsize);
×
2563
    if (src == NULL || ref == NULL || out == NULL) {
×
2564
        goto end;
×
2565
    }
2566

2567
    for (i = 0; i < srcsize; ++i) {
×
2568
        src[i] = (unsigned char)(i & 0xff);
×
2569
    }
2570

2571
    reference_scale(ref, src, srcw, srch, dstw, dsth, depth);
×
2572
    scale_without_resampling(out, src, srcw, srch, dstw, dsth, depth);
×
2573

2574
    if (memcmp(ref, out, dstsize) != 0) {
×
2575
        goto end;
×
2576
    }
2577

2578
    nret = EXIT_SUCCESS;
2579

2580
end:
×
2581
    free(src);
×
2582
    free(ref);
×
2583
    free(out);
×
2584
    return nret;
×
2585
}
2586

2587
SIXELAPI int
2588
sixel_scale_tests_main(void)
×
2589
{
2590
    int nret = EXIT_FAILURE;
×
2591
    size_t i;
×
2592
    struct {
×
2593
        int srcw;
2594
        int srch;
2595
        int dstw;
2596
        int dsth;
2597
        int depth;
2598
    } cases[] = {
×
2599
        {8, 4, 3, 7, 3},
2600
        {13, 9, 17, 6, 4}
2601
    };
2602

2603
    for (i = 0; i < sizeof(cases) / sizeof(cases[0]); ++i) {
×
2604
        nret = test_without_resampling_case(cases[i].srcw,
×
2605
                                            cases[i].srch,
2606
                                            cases[i].dstw,
2607
                                            cases[i].dsth,
2608
                                            cases[i].depth);
2609
        if (nret != EXIT_SUCCESS) {
×
2610
            goto end;
×
2611
        }
2612
    }
2613

2614
    nret = EXIT_SUCCESS;
2615

2616
end:
×
2617
    return nret;
×
2618
}
2619

2620
#endif /* HAVE_TESTS */
2621

2622
#if defined(__GNUC__) && !defined(__clang__)
2623
# pragma GCC diagnostic pop
2624
#endif
2625

2626
/* emacs Local Variables:      */
2627
/* emacs mode: c               */
2628
/* emacs tab-width: 4          */
2629
/* emacs indent-tabs-mode: nil */
2630
/* emacs c-basic-offset: 4     */
2631
/* emacs End:                  */
2632
/* vim: set expandtab ts=4 sts=4 sw=4 : */
2633
/* EOF */
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc