• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

saitoha / libsixel / 20699594618

04 Jan 2026 09:46PM UTC coverage: 56.656% (-0.4%) from 57.034%
20699594618

push

github

saitoha
ci: remove unnecessary --enable-gcov flag from ci-autotools jobs

7384 of 21596 branches covered (34.19%)

22347 of 39443 relevant lines covered (56.66%)

5518245.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.28
/src/scale.c
1
/*
2
 * SPDX-License-Identifier: MIT
3
 *
4
 * Copyright (c) 2021-2025 libsixel developers. See `AUTHORS`.
5
 * Copyright (c) 2014-2016 Hayaki Saito
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
8
 * this software and associated documentation files (the "Software"), to deal in
9
 * the Software without restriction, including without limitation the rights to
10
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
11
 * the Software, and to permit persons to whom the Software is furnished to do so,
12
 * subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in all
15
 * copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
19
 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
20
 * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
21
 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
 */
24

25
#if defined(HAVE_CONFIG_H)
26
#include "config.h"
27
#endif
28

29
/* STDC_HEADERS */
30
#include <stdlib.h>
31

32
#if HAVE_ERRNO_H
33
# include <errno.h>
34
#endif  /* HAVE_ERRNO_H */
35
#if HAVE_LIMITS_H
36
# include <limits.h>
37
#endif  /* HAVE_LIMITS_H */
38
#if HAVE_STRING_H
39
# include <string.h>
40
#endif  /* HAVE_STRING_H */
41
#if HAVE_STDINT_H
42
# include <stdint.h>
43
#endif  /* HAVE_STDINT_H */
44

45
#if HAVE_MATH_H
46
# define _USE_MATH_DEFINES  /* for MSVC */
47
# include <math.h>
48
#endif  /* HAVE_MATH_H */
49
#ifndef M_PI
50
# define M_PI 3.14159265358979323846
51
#endif
52

53
#include <sixel.h>
54

55
#include "cpu.h"
56
#include "logger.h"
57
#include "compat_stub.h"
58
#include "threading.h"
59

60
#if SIXEL_ENABLE_THREADS
61
# include "threadpool.h"
62
#endif
63

64
#if defined(__GNUC__) && defined(__i386__)
65
/*
66
 * i386 callers may enter with only 4- or 8-byte stack alignment. Force
67
 * realignment for SSE2-heavy routines to avoid movaps spills to unaligned
68
 * stack slots when SIMD is enabled via SIXEL_SIMD_LEVEL. Mark affected
69
 * functions noinline so the prologue that performs realignment is not
70
 * dropped by inlining.
71
 */
72
# define SIXEL_ALIGN_STACK __attribute__((force_align_arg_pointer))
73
# define SIXEL_NO_INLINE __attribute__((noinline))
74
#else
75
# define SIXEL_ALIGN_STACK
76
# define SIXEL_NO_INLINE
77
#endif
78

79
#if defined(HAVE_IMMINTRIN_H) && \
80
    (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
81
     defined(_M_IX86))
82
# define SIXEL_HAS_X86_INTRIN 1
83
# include <immintrin.h>
84
#endif
85

86
#if defined(__GNUC__) && !defined(__clang__)
87
/*
88
 * GCC reports a -Wpsabi note when __m512 parameters are present because the
89
 * calling convention changed in GCC 4.6. All callers and callees in this
90
 * translation unit share the same compiler, so suppress the note globally to
91
 * keep the output clean on AVX-512 builds.
92
 */
93
#pragma GCC diagnostic ignored "-Wpsabi"
94
#endif
95

96
#if defined(HAVE_SSE2)
97
/*
98
 * MSVC does not define __SSE2__ on x86/x64.  Instead, rely on the
99
 * architecture macros it provides so SIMD paths stay enabled after the
100
 * configure probe has validated SSE2 support.
101
 */
102
# if defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
103
    (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
104
#  if defined(HAVE_EMMINTRIN_H)
105
#   include <emmintrin.h>
106
#   define SIXEL_USE_SSE2 1
107
#  endif
108
# endif
109
#endif
110

111
#if defined(SIXEL_HAS_X86_INTRIN)
112
/* Reset ISA target macros in case another compilation unit defined them */
113
/* earlier in a unity or amalgamation build. */
114
# if defined(SIXEL_TARGET_AVX)
115
#  undef SIXEL_TARGET_AVX
116
# endif
117
# if defined(SIXEL_TARGET_AVX2)
118
#  undef SIXEL_TARGET_AVX2
119
# endif
120
# if defined(SIXEL_TARGET_AVX512)
121
#  undef SIXEL_TARGET_AVX512
122
# endif
123
# if defined(__GNUC__)
124
#  if !defined(__clang__)
125
#   define SIXEL_TARGET_AVX __attribute__((target("avx")))
126
#   define SIXEL_TARGET_AVX2 __attribute__((target("avx2")))
127
#   define SIXEL_TARGET_AVX512 __attribute__((target("avx512f")))
128
#   define SIXEL_USE_AVX 1
129
#  else
130
/*
131
 * clang rejects returning AVX vectors when the translation unit target
132
 * does not already include the corresponding ISA.  Guard runtime AVX
133
 * helpers with compile-time ISA availability to keep non-AVX builds
134
 * warning-free while still using AVX when the compiler enables it.
135
 */
136
#   define SIXEL_TARGET_AVX
137
#   define SIXEL_TARGET_AVX2
138
#   define SIXEL_TARGET_AVX512
139
#   if defined(__AVX__)
140
#    define SIXEL_USE_AVX 1
141
#   endif
142
#   if defined(__AVX2__)
143
#    define SIXEL_USE_AVX2 1
144
#   endif
145
#   if defined(__AVX512F__)
146
#    define SIXEL_USE_AVX512 1
147
#   endif
148
#  endif
149
# else
150
#  define SIXEL_TARGET_AVX
151
#  define SIXEL_TARGET_AVX2
152
#  define SIXEL_TARGET_AVX512
153
#  if defined(__AVX__)
154
#   define SIXEL_USE_AVX 1
155
#  endif
156
#  if defined(__AVX2__)
157
#   define SIXEL_USE_AVX2 1
158
#  endif
159
#  if defined(__AVX512F__)
160
#   define SIXEL_USE_AVX512 1
161
#  endif
162
# endif
163
#endif
164

165
#if defined(__GNUC__) && !defined(__clang__) && !defined(__PCC__)
166
# pragma GCC diagnostic push
167
# pragma GCC diagnostic ignored "-Wpsabi"
168
# undef SIXEL_USE_AVX
169
# undef SIXEL_USE_AVX2
170
# undef SIXEL_USE_AVX512
171
#endif
172

173
#if defined(HAVE_NEON)
174
# if (defined(__ARM_NEON) || defined(__ARM_NEON__))
175
#  if defined(HAVE_ARM_NEON_H)
176
#   include <arm_neon.h>
177
#   define SIXEL_USE_NEON 1
178
#  endif
179
# endif
180
#endif
181

182
#if !defined(MAX)
183
# define MAX(l, r) ((l) > (r) ? (l) : (r))
184
#endif
185
#if !defined(MIN)
186
#define MIN(l, r) ((l) < (r) ? (l) : (r))
187
#endif
188

189

190
#if 0
191
/* function Nearest Neighbor */
192
static double
193
nearest_neighbor(double const d)
194
{
195
    if (d <= 0.5) {
196
        return 1.0;
197
    }
198
    return 0.0;
199
}
200
#endif
201

202

203
/* function Bi-linear */
204
static double
205
bilinear(double const d)
147,851,849✔
206
{
207
    if (d < 1.0) {
147,851,849✔
208
        return 1.0 - d;
111,109,540✔
209
    }
210
    return 0.0;
211
}
212

213

214
/* function Welsh */
215
static double
216
welsh(double const d)
4,187,000✔
217
{
218
    if (d < 1.0) {
4,187,000✔
219
        return 1.0 - d * d;
748,000✔
220
    }
221
    return 0.0;
222
}
223

224

225
/* function Bi-cubic */
226
static double
227
bicubic(double const d)
6,491,030✔
228
{
229
    if (d <= 1.0) {
6,491,030✔
230
        return 1.0 + (d - 2.0) * d * d;
2,058,930✔
231
    }
232
    if (d <= 2.0) {
4,432,100✔
233
        return 4.0 + d * (-8.0 + d * (5.0 - d));
2,037,600✔
234
    }
235
    return 0.0;
236
}
237

238

239
/* function sinc
240
 * sinc(x) = sin(PI * x) / (PI * x)
241
 */
242
static double
243
sinc(double const x)
116,978,631✔
244
{
245
    return sin(M_PI * x) / (M_PI * x);
116,978,631✔
246
}
247

248

249
/* function Lanczos-2
250
 * Lanczos(x) = sinc(x) * sinc(x / 2) , |x| <= 2
251
 *            = 0, |x| > 2
252
 */
253
static double
254
lanczos2(double const d)
15,543,830✔
255
{
256
    if (d == 0.0) {
15,543,830!
257
        return 1.0;
258
    }
259
    if (d < 2.0) {
15,543,830✔
260
        return sinc(d) * sinc(d / 2.0);
5,745,360✔
261
    }
262
    return 0.0;
263
}
264

265

266
/* function Lanczos-3
267
 * Lanczos(x) = sinc(x) * sinc(x / 3) , |x| <= 3
268
 *            = 0, |x| > 3
269
 */
270
static double
271
lanczos3(double const d)
116,814,166✔
272
{
273
    if (d == 0.0) {
116,814,166!
274
        return 1.0;
275
    }
276
    if (d < 3.0) {
116,814,166✔
277
        return sinc(d) * sinc(d / 3.0);
107,895,641✔
278
    }
279
    return 0.0;
280
}
281

282
/* function Lanczos-4
283
 * Lanczos(x) = sinc(x) * sinc(x / 4) , |x| <= 4
284
 *            = 0, |x| > 4
285
 */
286
static double
287
lanczos4(double const d)
14,539,290✔
288
{
289
    if (d == 0.0) {
14,539,290!
290
        return 1.0;
291
    }
292
    if (d < 4.0) {
14,539,290✔
293
        return sinc(d) * sinc(d / 4.0);
3,337,630✔
294
    }
295
    return 0.0;
296
}
297

298

299
static double
300
gaussian(double const d)
3,393,250✔
301
{
302
    return exp(-2.0 * d * d) * sqrt(2.0 / M_PI);
3,393,250✔
303
}
304

305

306
static double
307
hanning(double const d)
3,653,980✔
308
{
309
    return 0.5 + 0.5 * cos(d * M_PI);
3,653,980✔
310
}
311

312

313
static double
314
hamming(const double d)
4,187,000✔
315
{
316
    return 0.54 + 0.46 * cos(d * M_PI);
4,187,000✔
317
}
318

319

320
static unsigned char
321
normalize(double x, double total)
16,244,316✔
322
{
323
    int result;
16,244,316✔
324

325
    result = floor(x / total);
16,244,316✔
326
    if (result > 255) {
16,244,316✔
327
        return 0xff;
328
    }
329
    if (result < 0) {
16,240,978✔
330
        return 0x00;
331
    }
332
    return (unsigned char)result;
16,240,007✔
333
}
334

335
static int
336
sixel_scale_simd_level(void)
160✔
337
{
338
    static int simd_level = -2;
160✔
339

340
    if (simd_level == -2) {
160!
341
        simd_level = sixel_cpu_simd_level();
160✔
342
#if defined(__i386__)
343
        /*
344
         * AVX and later widen the alignment requirement for stack spills to
345
         * 32 bytes. i386 stack realignment from force_align_arg_pointer only
346
         * guarantees 16-byte boundaries, so keep the runtime level capped at
347
         * SSE2 to avoid vmovaps faults when YMM locals spill.
348
         */
349
        if (simd_level > SIXEL_SIMD_LEVEL_SSE2) {
350
            simd_level = SIXEL_SIMD_LEVEL_SSE2;
351
        }
352
#endif
353
    }
354

355
    return simd_level;
160✔
356
}
357

358
static float
359
sixel_clamp_unit_f32(float value)
4,954,614✔
360
{
361
    /*
362
     * Resampling kernels with negative lobes can push linear RGB values
363
     * outside the unit interval. Clamp here so downstream conversions do
364
     * not collapse to black.
365
     */
366
    if (value < 0.0f) {
4,954,614✔
367
        return 0.0f;
368
    }
369
    if (value > 1.0f) {
4,934,015✔
370
        return 1.0f;
371
    }
372

373
    return value;
374
}
375

376
#if defined(HAVE_IMMINTRIN_H)
377
#if defined(SIXEL_USE_AVX)
378
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
379
sixel_avx_load_rgb_ps(unsigned char const *psrc)
380
{
381
    __m128i pixi128;
382
    __m128 pixf128;
383
    __m256 pixf256;
384

385
    /*
386
     * Build the byte vector explicitly so the AVX path never accumulates
387
     * garbage data when widening to 32-bit lanes.
388
     */
389
    pixi128 = _mm_setr_epi8((char)psrc[0],
390
                            (char)psrc[1],
391
                            (char)psrc[2],
392
                            0,
393
                            0, 0, 0, 0,
394
                            0, 0, 0, 0,
395
                            0, 0, 0, 0);
396
    pixf128 = _mm_cvtepi32_ps(pixi128);
397
    pixf256 = _mm256_castps128_ps256(pixf128);
398
    pixf256 = _mm256_insertf128_ps(pixf256, _mm_setzero_ps(), 1);
399
    return pixf256;
400
}
401

402
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX void
403
sixel_avx_store_rgb_u8(__m256 acc, double total, unsigned char *dst)
404
{
405
    __m256 scalev;
406
    __m256 minv;
407
    __m256 maxv;
408
    __m256i acci;
409
    int out[8];
410

411
    scalev = _mm256_set1_ps((float)(1.0 / total));
412
    acc = _mm256_mul_ps(acc, scalev);
413
    minv = _mm256_set1_ps(0.0f);
414
    maxv = _mm256_set1_ps(255.0f);
415
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
416
    acci = _mm256_cvtps_epi32(acc);
417
    _mm256_storeu_si256((__m256i *)out, acci);
418
    dst[0] = (unsigned char)out[0];
419
    dst[1] = (unsigned char)out[1];
420
    dst[2] = (unsigned char)out[2];
421
}
422

423
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
424
sixel_avx_zero_ps(void)
425
{
426
    return _mm256_setzero_ps();
427
}
428

429
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
430
sixel_avx_muladd_ps(__m256 acc, __m256 pix, float weight)
431
{
432
    __m256 wv;
433

434
    wv = _mm256_set1_ps(weight);
435
    return _mm256_add_ps(acc, _mm256_mul_ps(pix, wv));
436
}
437

438
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
439
sixel_avx_load_rgb_f32(float const *psrc)
440
{
441
    __m256 pixf;
442

443
    pixf = _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f,
444
                         psrc[2], psrc[1], psrc[0], 0.0f);
445
    return pixf;
446
}
447

448
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX void
449
sixel_avx_store_rgb_f32(__m256 acc, double total, float *dst)
450
{
451
    __m256 scalev;
452
    __m256 minv;
453
    __m256 maxv;
454
    float out[8];
455

456
    scalev = _mm256_set1_ps((float)(1.0 / total));
457
    acc = _mm256_mul_ps(acc, scalev);
458
    minv = _mm256_set1_ps(0.0f);
459
    maxv = _mm256_set1_ps(1.0f);
460
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
461
    _mm256_storeu_ps(out, acc);
462
    dst[0] = out[0];
463
    dst[1] = out[1];
464
    dst[2] = out[2];
465
}
466
#endif  /* SIXEL_USE_AVX */
467

468
#if defined(SIXEL_USE_AVX2)
469
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
470
sixel_avx2_load_rgb_ps(unsigned char const *psrc)
471
{
472
    __m128i pixi128;
473
    __m256i pixi256;
474

475
    /*
476
     * Keep the unused bytes zeroed so widening to epi32 does not pull in
477
     * stack junk and bias every output channel toward white.
478
     */
479
    pixi128 = _mm_setr_epi8((char)psrc[0],
480
                            (char)psrc[1],
481
                            (char)psrc[2],
482
                            0,
483
                            0, 0, 0, 0,
484
                            0, 0, 0, 0,
485
                            0, 0, 0, 0);
486
    pixi256 = _mm256_cvtepu8_epi32(pixi128);
487
    return _mm256_cvtepi32_ps(pixi256);
488
}
489

490
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 void
491
sixel_avx2_store_rgb_u8(__m256 acc, double total, unsigned char *dst)
492
{
493
    __m256 scalev;
494
    __m256 minv;
495
    __m256 maxv;
496
    __m256i acci;
497
    int out[8];
498

499
    scalev = _mm256_set1_ps((float)(1.0 / total));
500
    acc = _mm256_mul_ps(acc, scalev);
501
    minv = _mm256_set1_ps(0.0f);
502
    maxv = _mm256_set1_ps(255.0f);
503
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
504
    acci = _mm256_cvtps_epi32(acc);
505
    _mm256_storeu_si256((__m256i *)out, acci);
506
    dst[0] = (unsigned char)out[0];
507
    dst[1] = (unsigned char)out[1];
508
    dst[2] = (unsigned char)out[2];
509
}
510

511
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
512
sixel_avx2_zero_ps(void)
513
{
514
    return _mm256_setzero_ps();
515
}
516

517
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
518
sixel_avx2_muladd_ps(__m256 acc, __m256 pix, float weight)
519
{
520
    __m256 wv;
521

522
    wv = _mm256_set1_ps(weight);
523
    return _mm256_add_ps(acc, _mm256_mul_ps(pix, wv));
524
}
525

526
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
527
sixel_avx2_load_rgb_f32(float const *psrc)
528
{
529
    __m256 pixf;
530

531
    pixf = _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f,
532
                         psrc[2], psrc[1], psrc[0], 0.0f);
533
    return pixf;
534
}
535

536
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 void
537
sixel_avx2_store_rgb_f32(__m256 acc, double total, float *dst)
538
{
539
    __m256 scalev;
540
    __m256 minv;
541
    __m256 maxv;
542
    float out[8];
543

544
    scalev = _mm256_set1_ps((float)(1.0 / total));
545
    acc = _mm256_mul_ps(acc, scalev);
546
    minv = _mm256_set1_ps(0.0f);
547
    maxv = _mm256_set1_ps(1.0f);
548
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
549
    _mm256_storeu_ps(out, acc);
550
    dst[0] = out[0];
551
    dst[1] = out[1];
552
    dst[2] = out[2];
553
}
554
#endif  /* SIXEL_USE_AVX2 */
555

556
#if defined(SIXEL_USE_AVX512)
557
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
558
sixel_avx512_load_rgb_ps(unsigned char const *psrc)
559
{
560
    __m128i pixi128;
561
    __m512i pixi512;
562

563
    pixi128 = _mm_setr_epi8((char)psrc[0],
564
                            (char)psrc[1],
565
                            (char)psrc[2],
566
                            0,
567
                            0, 0, 0, 0,
568
                            0, 0, 0, 0,
569
                            0, 0, 0, 0);
570
    pixi512 = _mm512_cvtepu8_epi32(pixi128);
571
    return _mm512_cvtepi32_ps(pixi512);
572
}
573

574
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 void
575
sixel_avx512_store_rgb_u8(__m512 const *acc,
576
                          double total,
577
                          unsigned char *dst)
578
{
579
    __m512 scalev;
580
    __m512 minv;
581
    __m512 maxv;
582
    __m512 accv;
583
    __m512i acci;
584
    int out[16];
585

586
    scalev = _mm512_set1_ps((float)(1.0 / total));
587
    accv = _mm512_mul_ps(*acc, scalev);
588
    minv = _mm512_set1_ps(0.0f);
589
    maxv = _mm512_set1_ps(255.0f);
590
    accv = _mm512_max_ps(minv, _mm512_min_ps(accv, maxv));
591
    acci = _mm512_cvtps_epi32(accv);
592
    _mm512_storeu_si512((void *)out, acci);
593
    dst[0] = (unsigned char)out[0];
594
    dst[1] = (unsigned char)out[1];
595
    dst[2] = (unsigned char)out[2];
596
}
597

598
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
599
sixel_avx512_zero_ps(void)
600
{
601
    return _mm512_setzero_ps();
602
}
603

604
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
605
sixel_avx512_muladd_ps(__m512 acc, __m512 pix, float weight)
606
{
607
    __m512 wv;
608

609
    wv = _mm512_set1_ps(weight);
610
    return _mm512_add_ps(acc, _mm512_mul_ps(pix, wv));
611
}
612

613
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
614
sixel_avx512_load_rgb_f32(float const *psrc)
615
{
616
    __m512 pixf;
617

618
    pixf = _mm512_set_ps(0.0f, 0.0f, 0.0f, 0.0f,
619
                         0.0f, 0.0f, 0.0f, 0.0f,
620
                         0.0f, 0.0f, 0.0f, 0.0f,
621
                         psrc[2], psrc[1], psrc[0], 0.0f);
622
    return pixf;
623
}
624

625
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 void
626
sixel_avx512_store_rgb_f32(__m512 const *acc,
627
                           double total,
628
                           float *dst)
629
{
630
    __m512 scalev;
631
    __m512 minv;
632
    __m512 maxv;
633
    __m512 accv;
634
    float out[16];
635

636
    scalev = _mm512_set1_ps((float)(1.0 / total));
637
    accv = _mm512_mul_ps(*acc, scalev);
638
    minv = _mm512_set1_ps(0.0f);
639
    maxv = _mm512_set1_ps(1.0f);
640
    accv = _mm512_max_ps(minv, _mm512_min_ps(accv, maxv));
641
    _mm512_storeu_ps(out, accv);
642
    dst[0] = out[0];
643
    dst[1] = out[1];
644
    dst[2] = out[2];
645
}
646
#endif  /* SIXEL_USE_AVX512 */
647
#endif /* HAVE_IMMINTRIN_H */
648

649

650
static void
651
scale_without_resampling(
11✔
652
    unsigned char *dst,
653
    unsigned char const *src,
654
    int const srcw,
655
    int const srch,
656
    int const dstw,
657
    int const dsth,
658
    int const depth)
659
{
660
    int w;
11✔
661
    int h;
11✔
662
    int x;
11✔
663
    int y;
11✔
664
    int i;
11✔
665
    int pos;
11✔
666

667
    for (h = 0; h < dsth; h++) {
269✔
668
        for (w = 0; w < dstw; w++) {
126,874✔
669
            x = (long)w * srcw / dstw;
126,616✔
670
            y = (long)h * srch / dsth;
126,616✔
671
            for (i = 0; i < depth; i++) {
506,464✔
672
                pos = (y * srcw + x) * depth + i;
379,848✔
673
                dst[(h * dstw + w) * depth + i] = src[pos];
379,848✔
674
            }
675
        }
676
    }
677
}
11✔
678

679
static void
680
scale_without_resampling_float32(
24✔
681
    float *dst,
682
    float const *src,
683
    int const srcw,
684
    int const srch,
685
    int const dstw,
686
    int const dsth,
687
    int const depth)
688
{
689
    int w;
24✔
690
    int h;
24✔
691
    int x;
24✔
692
    int y;
24✔
693
    int i;
24✔
694
    int pos;
24✔
695

696
    for (h = 0; h < dsth; h++) {
576✔
697
        for (w = 0; w < dstw; w++) {
192,576✔
698
            x = (long)w * srcw / dstw;
192,024✔
699
            y = (long)h * srch / dsth;
192,024✔
700
            for (i = 0; i < depth; i++) {
768,096✔
701
                pos = (y * srcw + x) * depth + i;
576,072✔
702
                dst[(h * dstw + w) * depth + i] = src[pos];
576,072✔
703
            }
704
        }
705
    }
706
}
24✔
707

708

709
typedef double (*resample_fn_t)(double const d);
710

711
#if defined(__GNUC__) && !defined(__clang__) && !defined(__PCC__)
712
#pragma GCC diagnostic push
713
/*
714
 * GCC emits a -Wpsabi note for __m512 parameters because the calling
715
 * convention changed in GCC 4.6. The functions only pass vectors between
716
 * helpers compiled with the same compiler, so suppress the noise locally.
717
 */
718
#pragma GCC diagnostic ignored "-Wpsabi"
719
#endif
720

721
/*
722
 * Two-pass separable filter helpers. Each function processes a single row so
723
 * the caller may invoke them serially or from a threadpool worker. On i386 we
724
 * also mark the functions noinline to ensure the stack-realigning prologue
725
 * from SIXEL_ALIGN_STACK is preserved under optimization.
726
 */
727
static SIXEL_ALIGN_STACK SIXEL_NO_INLINE void
728
scale_horizontal_row(
41,620✔
729
    unsigned char *tmp,
730
    unsigned char const *src,
731
    int const srcw,
732
    int const dstw,
733
    int const depth,
734
    int const y,
735
    resample_fn_t const f_resample,
736
    double const n,
737
    int const simd_level)
738
{
739
    int w;
41,620✔
740
    int x;
41,620✔
741
    int i;
41,620✔
742
    int pos;
41,620✔
743
    int x_first;
41,620✔
744
    int x_last;
41,620✔
745
    double center_x;
41,620✔
746
    double diff_x;
41,620✔
747
    double weight;
41,620✔
748
    double total;
41,620✔
749
    double offsets[8];
41,620✔
750
#if !defined(SIXEL_USE_AVX512) && !defined(SIXEL_USE_AVX2) && \
751
    !defined(SIXEL_USE_AVX) && !defined(SIXEL_USE_SSE2) && \
752
    !defined(SIXEL_USE_NEON)
753
    /*
754
     * No SIMD backends are compiled for this target, so the SIMD level gate
755
     * becomes a dead parameter. Silence -Wunused-parameter on 32-bit GCC
756
     * builds while keeping the signature identical across configurations.
757
     */
758
    (void)simd_level;
15,858✔
759
#endif
760
#if defined(SIXEL_USE_AVX512)
761
    __m512 acc512;
762
    __m512 pix512;
763
#endif
764
#if defined(SIXEL_USE_AVX2) || defined(SIXEL_USE_AVX)
765
    __m256 acc256;
766
#endif
767
#if defined(SIXEL_USE_SSE2)
768
    /*
769
     * __m128 locals remain on the stack. On i386 callers may arrive with
770
     * only 4- or 8-byte alignment, so movaps spills can fault when SSE2 is
771
     * forced. SIXEL_ALIGN_STACK realigns the frame on entry to keep the
772
     * SSE2 path consistent with the 16-byte guarantee on x86_64.
773
     */
774
    __m128 acc128;
25,762✔
775
    __m128 minv128;
25,762✔
776
    __m128 maxv128;
25,762✔
777
    __m128 scalev128;
25,762✔
778
    __m128 wv128;
25,762✔
779
    __m128 pixf128;
25,762✔
780
    __m128i pixi128;
25,762✔
781
    __m128i acci128;
25,762✔
782
    __m128i acc16_128;
25,762✔
783
    unsigned int pixel128;
25,762✔
784
#endif
785
#if defined(SIXEL_USE_NEON)
786
    float32x4_t acc_neon;
787
    float32x4_t minv_neon;
788
    float32x4_t maxv_neon;
789
    float32x4_t scalev_neon;
790
    float32x4_t wv_neon;
791
    float32x4_t pixf_neon;
792
    uint32x4_t pix32_neon;
793
    uint32x4_t acci_neon;
794
    uint16x4_t acc16_neon;
795
    uint8x8_t acc8_neon;
796
    uint8_t outb_neon[8];
797
#endif
798

799
    for (w = 0; w < dstw; w++) {
9,800,869✔
800
        total = 0.0;
38,880,763✔
801
        for (i = 0; i < depth; i++) {
38,880,763✔
802
            offsets[i] = 0;
29,121,513✔
803
        }
804

805
        if (dstw >= srcw) {
9,759,250✔
806
            center_x = (w + 0.5) * srcw / dstw;
496,000✔
807
            x_first = MAX((int)(center_x - n), 0);
496,000✔
808
            x_last = MIN((int)(center_x + n), srcw - 1);
496,000✔
809
        } else {
810
            center_x = w + 0.5;
9,263,250✔
811
            x_first = MAX((int)floor((center_x - n) * srcw / dstw), 0);
9,263,250✔
812
            x_last = MIN((int)floor((center_x + n) * srcw / dstw),
9,263,250✔
813
                         srcw - 1);
814
        }
815

816
#if defined(SIXEL_USE_AVX512)
817
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
818
#if defined(__GNUC__) && !defined(__clang__)
819
#pragma GCC diagnostic push
820
#pragma GCC diagnostic ignored "-Wpsabi"
821
#endif
822
            acc512 = sixel_avx512_zero_ps();
823

824
            for (x = x_first; x <= x_last; x++) {
825
                diff_x = (dstw >= srcw)
826
                             ? (x + 0.5) - center_x
827
                             : (x + 0.5) * dstw / srcw - center_x;
828
                weight = f_resample(fabs(diff_x));
829
                pos = (y * srcw + x) * depth;
830
                pix512 = sixel_avx512_load_rgb_ps(src + pos);
831
                acc512 = sixel_avx512_muladd_ps(
832
                    acc512,
833
                    pix512,
834
                    (float)weight);
835
                total += weight;
836
            }
837
            if (total > 0.0) {
838
                pos = (y * dstw + w) * depth;
839
                sixel_avx512_store_rgb_u8(&acc512, total, tmp + pos);
840
            }
841
#if defined(__GNUC__) && !defined(__clang__)
842
#pragma GCC diagnostic pop
843
#endif
844
            continue;
845
        }
846
#endif
847
#if defined(SIXEL_USE_AVX2)
848
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
849
            acc256 = sixel_avx2_zero_ps();
850

851
            for (x = x_first; x <= x_last; x++) {
852
                diff_x = (dstw >= srcw)
853
                             ? (x + 0.5) - center_x
854
                             : (x + 0.5) * dstw / srcw - center_x;
855
                weight = f_resample(fabs(diff_x));
856
                pos = (y * srcw + x) * depth;
857
                acc256 = sixel_avx2_muladd_ps(
858
                    acc256,
859
                    sixel_avx2_load_rgb_ps(src + pos),
860
                    (float)weight);
861
                total += weight;
862
            }
863
            if (total > 0.0) {
864
                pos = (y * dstw + w) * depth;
865
                sixel_avx2_store_rgb_u8(acc256, total, tmp + pos);
866
            }
867
            continue;
868
        }
869
#endif
870
#if defined(SIXEL_USE_AVX)
871
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
872
            acc256 = sixel_avx_zero_ps();
873

874
            for (x = x_first; x <= x_last; x++) {
875
                diff_x = (dstw >= srcw)
876
                             ? (x + 0.5) - center_x
877
                             : (x + 0.5) * dstw / srcw - center_x;
878
                weight = f_resample(fabs(diff_x));
879
                pos = (y * srcw + x) * depth;
880
                acc256 = sixel_avx_muladd_ps(
881
                    acc256,
882
                    sixel_avx_load_rgb_ps(src + pos),
883
                    (float)weight);
884
                total += weight;
885
            }
886
            if (total > 0.0) {
887
                pos = (y * dstw + w) * depth;
888
                sixel_avx_store_rgb_u8(acc256, total, tmp + pos);
889
            }
890
            continue;
891
        }
892
#endif
893
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
894
        if (depth == 3
11,925,465✔
895
# if defined(SIXEL_USE_SSE2)
896
            && simd_level >= SIXEL_SIMD_LEVEL_SSE2
5,940,509!
897
# elif defined(SIXEL_USE_NEON)
898
            && simd_level >= SIXEL_SIMD_LEVEL_NEON
899
# endif
900
            ) {
901
#if defined(SIXEL_USE_SSE2)
902
            acc128 = _mm_setzero_ps();
903
#elif defined(SIXEL_USE_NEON)
904
            acc_neon = vdupq_n_f32(0.0f);
905
#endif
906
            for (x = x_first; x <= x_last; x++) {
132,674,621✔
907
                diff_x = (dstw >= srcw)
253,379,330✔
908
                             ? (x + 0.5) - center_x
910,400✔
909
                             : (x + 0.5) * dstw / srcw - center_x;
126,689,665✔
910
                weight = f_resample(fabs(diff_x));
126,689,665✔
911
                pos = (y * srcw + x) * depth;
126,718,040✔
912
                const unsigned char *psrc = src + pos;
126,718,040✔
913
#if defined(SIXEL_USE_SSE2)
914
                pixel128 = psrc[0] | (psrc[1] << 8) | (psrc[2] << 16);
126,718,040✔
915
                pixi128 = _mm_cvtsi32_si128((int)pixel128);
126,718,040✔
916
                pixi128 = _mm_unpacklo_epi8(pixi128, _mm_setzero_si128());
126,718,040✔
917
                pixi128 = _mm_unpacklo_epi16(pixi128, _mm_setzero_si128());
126,718,040✔
918
                pixf128 = _mm_cvtepi32_ps(pixi128);
126,718,040✔
919
                wv128 = _mm_set1_ps((float)weight);
126,718,040✔
920
                acc128 = _mm_add_ps(acc128, _mm_mul_ps(pixf128, wv128));
126,718,040✔
921
#else /* NEON */
922
                pix32_neon = (uint32x4_t){psrc[0], psrc[1], psrc[2], 0};
923
                pixf_neon = vcvtq_f32_u32(pix32_neon);
924
                wv_neon = vdupq_n_f32((float)weight);
925
                acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
926
#endif
927
                total += weight;
126,718,040✔
928
            }
929
            if (total > 0.0) {
5,984,956!
930
#if defined(SIXEL_USE_SSE2)
931
                scalev128 = _mm_set1_ps((float)(1.0 / total));
5,984,956✔
932
                acc128 = _mm_mul_ps(acc128, scalev128);
5,984,956✔
933
                minv128 = _mm_set1_ps(0.0f);
5,984,956✔
934
                maxv128 = _mm_set1_ps(255.0f);
5,984,956✔
935
                acc128 = _mm_max_ps(minv128, _mm_min_ps(acc128, maxv128));
5,984,956✔
936
                acci128 = _mm_cvtps_epi32(acc128);
5,984,956✔
937
                acc16_128 = _mm_packs_epi32(acci128, _mm_setzero_si128());
5,984,956✔
938
                acc16_128 = _mm_packus_epi16(acc16_128, _mm_setzero_si128());
5,984,956✔
939
                pos = (y * dstw + w) * depth;
5,984,956✔
940
                pixel128 = (unsigned int)_mm_cvtsi128_si32(acc16_128);
5,984,956✔
941
                tmp[pos + 0] = (unsigned char)pixel128;
5,984,956✔
942
                tmp[pos + 1] = (unsigned char)(pixel128 >> 8);
5,984,956✔
943
                tmp[pos + 2] = (unsigned char)(pixel128 >> 16);
5,984,956✔
944
#else /* NEON */
945
                scalev_neon = vdupq_n_f32((float)(1.0 / total));
946
                acc_neon = vmulq_f32(acc_neon, scalev_neon);
947
                minv_neon = vdupq_n_f32(0.0f);
948
                maxv_neon = vdupq_n_f32(255.0f);
949
                acc_neon = vmaxq_f32(minv_neon,
950
                                     vminq_f32(acc_neon, maxv_neon));
951
                acci_neon = vcvtq_u32_f32(acc_neon);
952
                acc16_neon = vmovn_u32(acci_neon);
953
                acc8_neon = vmovn_u16(vcombine_u16(acc16_neon, acc16_neon));
954

955
                vst1_u8(outb_neon, acc8_neon);
956
                pos = (y * dstw + w) * depth;
957
                tmp[pos + 0] = outb_neon[0];
958
                tmp[pos + 1] = outb_neon[1];
959
                tmp[pos + 2] = outb_neon[2];
960
#endif
961
            }
962
            continue;
5,984,956✔
963
        }
964
#endif /* SIMD paths */
965

966
        for (x = x_first; x <= x_last; x++) {
83,857,299!
967
            diff_x = (dstw >= srcw)
160,077,116✔
968
                         ? (x + 0.5) - center_x
574,800✔
969
                         : (x + 0.5) * dstw / srcw - center_x;
80,038,558!
970
            weight = f_resample(fabs(diff_x));
80,038,558✔
971
            for (i = 0; i < depth; i++) {
390,622,232!
972
                pos = (y * srcw + x) * depth + i;
230,545,116✔
973
                offsets[i] += src[pos] * weight;
230,545,116✔
974
            }
975
            total += weight;
80,038,558✔
976
        }
977

978
        if (total > 0.0) {
3,818,741!
979
            for (i = 0; i < depth; i++) {
14,998,164!
980
                pos = (y * dstw + w) * depth + i;
11,196,861✔
981
                tmp[pos] = normalize(offsets[i], total);
11,196,861✔
982
            }
983
        }
984
    }
985
}
41,619✔
986

987
static SIXEL_ALIGN_STACK SIXEL_NO_INLINE void
988
scale_vertical_row(
11,408✔
989
    unsigned char *dst,
990
    unsigned char const *tmp,
991
    int const dstw,
992
    int const dsth,
993
    int const depth,
994
    int const srch,
995
    int const h,
996
    resample_fn_t const f_resample,
997
    double const n,
998
    int const simd_level)
999
{
1000
    int w;
11,408✔
1001
    int y;
11,408✔
1002
    int i;
11,408✔
1003
    int pos;
11,408✔
1004
    int y_first;
11,408✔
1005
    int y_last;
11,408✔
1006
    double center_y;
11,408✔
1007
    double diff_y;
11,408✔
1008
    double weight;
11,408✔
1009
    double total;
11,408✔
1010
    double offsets[8];
11,408✔
1011
#if !defined(SIXEL_USE_AVX512) && !defined(SIXEL_USE_AVX2) && \
1012
    !defined(SIXEL_USE_AVX) && !defined(SIXEL_USE_SSE2) && \
1013
    !defined(SIXEL_USE_NEON)
1014
    /*
1015
     * When no SIMD implementations are present the runtime SIMD level does
1016
     * not influence the algorithm. Mark it unused to keep 32-bit GCC quiet
1017
     * without altering the interface shared with SIMD-enabled builds.
1018
     */
1019
    (void)simd_level;
4,331✔
1020
#endif
1021
#if defined(SIXEL_USE_AVX512)
1022
    __m512 acc512;
1023
    __m512 pix512;
1024
#endif
1025
#if defined(SIXEL_USE_AVX2) || defined(SIXEL_USE_AVX)
1026
    __m256 acc256;
1027
#endif
1028
#if defined(SIXEL_USE_SSE2)
1029
    __m128 acc128;
7,077✔
1030
    __m128 minv128;
7,077✔
1031
    __m128 maxv128;
7,077✔
1032
    __m128 scalev128;
7,077✔
1033
    __m128 wv128;
7,077✔
1034
    __m128 pixf128;
7,077✔
1035
    __m128i pixi128;
7,077✔
1036
    __m128i acci128;
7,077✔
1037
    __m128i acc16_128;
7,077✔
1038
    unsigned int pixel128;
7,077✔
1039
#endif
1040
#if defined(SIXEL_USE_NEON)
1041
    float32x4_t acc_neon;
1042
    float32x4_t minv_neon;
1043
    float32x4_t maxv_neon;
1044
    float32x4_t scalev_neon;
1045
    float32x4_t wv_neon;
1046
    float32x4_t pixf_neon;
1047
    uint32x4_t pix32_neon;
1048
    uint32x4_t acci_neon;
1049
    uint16x4_t acc16_neon;
1050
    uint8x8_t acc8_neon;
1051
    uint8_t outb_neon[8];
1052
#endif
1053

1054
    for (w = 0; w < dstw; w++) {
4,571,492✔
1055
        total = 0.0;
18,195,960✔
1056
        for (i = 0; i < depth; i++) {
18,195,960✔
1057
            offsets[i] = 0;
13,635,876✔
1058
        }
1059

1060
        if (dsth >= srch) {
4,560,084✔
1061
            center_y = (h + 0.5) * srch / dsth;
1,599,500✔
1062
            y_first = MAX((int)(center_y - n), 0);
1,599,500✔
1063
            y_last = MIN((int)(center_y + n), srch - 1);
1,599,500✔
1064
        } else {
1065
            center_y = h + 0.5;
2,960,584✔
1066
            y_first = MAX((int)floor((center_y - n) * srch / dsth), 0);
2,960,584✔
1067
            y_last = MIN((int)floor((center_y + n) * srch / dsth),
2,960,584✔
1068
                         srch - 1);
1069
        }
1070

1071
#if defined(SIXEL_USE_AVX512)
1072
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
1073
#if defined(__GNUC__) && !defined(__clang__)
1074
#pragma GCC diagnostic push
1075
#pragma GCC diagnostic ignored "-Wpsabi"
1076
#endif
1077
            acc512 = sixel_avx512_zero_ps();
1078

1079
            for (y = y_first; y <= y_last; y++) {
1080
                diff_y = (dsth >= srch)
1081
                             ? (y + 0.5) - center_y
1082
                             : (y + 0.5) * dsth / srch - center_y;
1083
                weight = f_resample(fabs(diff_y));
1084
                pos = (y * dstw + w) * depth;
1085
                pix512 = sixel_avx512_load_rgb_ps(tmp + pos);
1086
                acc512 = sixel_avx512_muladd_ps(
1087
                    acc512,
1088
                    pix512,
1089
                    (float)weight);
1090
                total += weight;
1091
            }
1092
            if (total > 0.0) {
1093
                pos = (h * dstw + w) * depth;
1094
                sixel_avx512_store_rgb_u8(&acc512, total, dst + pos);
1095
            }
1096
#if defined(__GNUC__) && !defined(__clang__)
1097
#pragma GCC diagnostic pop
1098
#endif
1099
            continue;
1100
        }
1101
#endif
1102
#if defined(SIXEL_USE_AVX2)
1103
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
1104
            acc256 = sixel_avx2_zero_ps();
1105

1106
            for (y = y_first; y <= y_last; y++) {
1107
                diff_y = (dsth >= srch)
1108
                             ? (y + 0.5) - center_y
1109
                             : (y + 0.5) * dsth / srch - center_y;
1110
                weight = f_resample(fabs(diff_y));
1111
                pos = (y * dstw + w) * depth;
1112
                acc256 = sixel_avx2_muladd_ps(
1113
                    acc256,
1114
                    sixel_avx2_load_rgb_ps(tmp + pos),
1115
                    (float)weight);
1116
                total += weight;
1117
            }
1118
            if (total > 0.0) {
1119
                pos = (h * dstw + w) * depth;
1120
                sixel_avx2_store_rgb_u8(acc256, total, dst + pos);
1121
            }
1122
            continue;
1123
        }
1124
#endif
1125
#if defined(SIXEL_USE_AVX)
1126
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
1127
            acc256 = sixel_avx_zero_ps();
1128

1129
            for (y = y_first; y <= y_last; y++) {
1130
                diff_y = (dsth >= srch)
1131
                             ? (y + 0.5) - center_y
1132
                             : (y + 0.5) * dsth / srch - center_y;
1133
                weight = f_resample(fabs(diff_y));
1134
                pos = (y * dstw + w) * depth;
1135
                acc256 = sixel_avx_muladd_ps(
1136
                    acc256,
1137
                    sixel_avx_load_rgb_ps(tmp + pos),
1138
                    (float)weight);
1139
                total += weight;
1140
            }
1141
            if (total > 0.0) {
1142
                pos = (h * dstw + w) * depth;
1143
                sixel_avx_store_rgb_u8(acc256, total, dst + pos);
1144
            }
1145
            continue;
1146
        }
1147
#endif
1148
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
1149
        if (depth == 3
5,638,511✔
1150
# if defined(SIXEL_USE_SSE2)
1151
            && simd_level >= SIXEL_SIMD_LEVEL_SSE2
2,819,508!
1152
# elif defined(SIXEL_USE_NEON)
1153
            && simd_level >= SIXEL_SIMD_LEVEL_NEON
1154
# endif
1155
            ) {
1156
#if defined(SIXEL_USE_SSE2)
1157
            acc128 = _mm_setzero_ps();
1158
#elif defined(SIXEL_USE_NEON)
1159
            acc_neon = vdupq_n_f32(0.0f);
1160
#endif
1161
            for (y = y_first; y <= y_last; y++) {
18,386,638✔
1162
                diff_y = (dsth >= srch)
31,135,270✔
1163
                             ? (y + 0.5) - center_y
3,026,640✔
1164
                             : (y + 0.5) * dsth / srch - center_y;
15,567,635✔
1165
                weight = f_resample(fabs(diff_y));
15,567,635✔
1166
                pos = (y * dstw + w) * depth;
15,554,725✔
1167
                const unsigned char *psrc = tmp + pos;
15,554,725✔
1168
#if defined(SIXEL_USE_SSE2)
1169
                pixel128 = psrc[0] | (psrc[1] << 8) | (psrc[2] << 16);
15,554,725✔
1170
                pixi128 = _mm_cvtsi32_si128((int)pixel128);
15,554,725✔
1171
                pixi128 = _mm_unpacklo_epi8(pixi128, _mm_setzero_si128());
15,554,725✔
1172
                pixi128 = _mm_unpacklo_epi16(pixi128, _mm_setzero_si128());
15,554,725✔
1173
                pixf128 = _mm_cvtepi32_ps(pixi128);
15,554,725✔
1174
                wv128 = _mm_set1_ps((float)weight);
15,554,725✔
1175
                acc128 = _mm_add_ps(acc128, _mm_mul_ps(pixf128, wv128));
15,554,725✔
1176
#else /* NEON */
1177
                pix32_neon = (uint32x4_t){psrc[0], psrc[1], psrc[2], 0};
1178
                pixf_neon = vcvtq_f32_u32(pix32_neon);
1179
                wv_neon = vdupq_n_f32((float)weight);
1180
                acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
1181
#endif
1182
                total += weight;
15,554,725✔
1183
            }
1184
            if (total > 0.0) {
2,819,003!
1185
#if defined(SIXEL_USE_SSE2)
1186
                scalev128 = _mm_set1_ps((float)(1.0 / total));
2,819,003✔
1187
                acc128 = _mm_mul_ps(acc128, scalev128);
2,819,003✔
1188
                minv128 = _mm_set1_ps(0.0f);
2,819,003✔
1189
                maxv128 = _mm_set1_ps(255.0f);
2,819,003✔
1190
                acc128 = _mm_max_ps(minv128, _mm_min_ps(acc128, maxv128));
2,819,003✔
1191
                acci128 = _mm_cvtps_epi32(acc128);
2,819,003✔
1192
                acc16_128 = _mm_packs_epi32(acci128, _mm_setzero_si128());
2,819,003✔
1193
                acc16_128 = _mm_packus_epi16(acc16_128, _mm_setzero_si128());
2,819,003✔
1194
                pos = (h * dstw + w) * depth;
2,819,003✔
1195
                pixel128 = (unsigned int)_mm_cvtsi128_si32(acc16_128);
2,819,003✔
1196
                dst[pos + 0] = (unsigned char)pixel128;
2,819,003✔
1197
                dst[pos + 1] = (unsigned char)(pixel128 >> 8);
2,819,003✔
1198
                dst[pos + 2] = (unsigned char)(pixel128 >> 16);
2,819,003✔
1199
#else /* NEON */
1200
                scalev_neon = vdupq_n_f32((float)(1.0 / total));
1201
                acc_neon = vmulq_f32(acc_neon, scalev_neon);
1202
                minv_neon = vdupq_n_f32(0.0f);
1203
                maxv_neon = vdupq_n_f32(255.0f);
1204
                acc_neon = vmaxq_f32(minv_neon,
1205
                                     vminq_f32(acc_neon, maxv_neon));
1206
                acci_neon = vcvtq_u32_f32(acc_neon);
1207
                acc16_neon = vmovn_u32(acci_neon);
1208
                acc8_neon = vmovn_u16(vcombine_u16(acc16_neon, acc16_neon));
1209

1210
                vst1_u8(outb_neon, acc8_neon);
1211
                pos = (h * dstw + w) * depth;
1212
                dst[pos + 0] = outb_neon[0];
1213
                dst[pos + 1] = outb_neon[1];
1214
                dst[pos + 2] = outb_neon[2];
1215
#endif
1216
            }
1217
            continue;
2,819,003✔
1218
        }
1219
#endif /* SIMD paths */
1220
        for (y = y_first; y <= y_last; y++) {
10,998,180!
1221
            diff_y = (dsth >= srch)
18,514,198✔
1222
                         ? (y + 0.5) - center_y
1,752,520✔
1223
                         : (y + 0.5) * dsth / srch - center_y;
9,257,099!
1224
            weight = f_resample(fabs(diff_y));
9,257,099✔
1225
            for (i = 0; i < depth; i++) {
45,319,715!
1226
                pos = (y * dstw + w) * depth + i;
26,792,607✔
1227
                offsets[i] += tmp[pos] * weight;
26,792,607✔
1228
            }
1229
            total += weight;
9,270,009✔
1230
        }
1231

1232
        if (total > 0.0) {
1,741,081!
1233
            for (i = 0; i < depth; i++) {
6,845,818!
1234
                pos = (h * dstw + w) * depth + i;
5,080,344✔
1235
                dst[pos] = normalize(offsets[i], total);
5,080,344✔
1236
            }
1237
        }
1238
    }
1239
}
11,408✔
1240

1241
#if defined(__GNUC__) && !defined(__clang__) && !defined(__PCC__)
1242
# pragma GCC diagnostic pop
1243
#endif
1244

1245
static void
1246
scale_with_resampling_serial(
68✔
1247
    unsigned char *dst,
1248
    unsigned char const *src,
1249
    int const srcw,
1250
    int const srch,
1251
    int const dstw,
1252
    int const dsth,
1253
    int const depth,
1254
    resample_fn_t const f_resample,
1255
    double const n,
1256
    unsigned char *tmp)
1257
{
1258
    int y;
68✔
1259
    int h;
68✔
1260
    int simd_level;
68✔
1261

1262
    simd_level = sixel_scale_simd_level();
68✔
1263
#if !defined(SIXEL_USE_AVX512) && !defined(SIXEL_USE_AVX2) && \
1264
    !defined(SIXEL_USE_AVX) && !defined(SIXEL_USE_SSE2) && \
1265
    !defined(SIXEL_USE_NEON)
1266
    /*
1267
     * GCC i686 builds can compile this function without any SIMD backends
1268
     * enabled; consume the detection result to keep the signature stable
1269
     * while avoiding an unused-but-set-variable warning.
1270
     */
1271
    (void)simd_level;
23✔
1272
#endif
1273

1274
    for (y = 0; y < srch; y++) {
28,580✔
1275
        scale_horizontal_row(tmp,
28,444✔
1276
                             src,
1277
                             srcw,
1278
                             dstw,
1279
                             depth,
1280
                             y,
1281
                             f_resample,
1282
                             n,
1283
                             simd_level);
1284
    }
1285

1286
    for (h = 0; h < dsth; h++) {
9,505✔
1287
        scale_vertical_row(dst,
9,437✔
1288
                           tmp,
1289
                           dstw,
1290
                           dsth,
1291
                           depth,
1292
                           srch,
1293
                           h,
1294
                           f_resample,
1295
                           n,
1296
                           simd_level);
1297
    }
1298
}
68✔
1299

1300
#if SIXEL_ENABLE_THREADS
1301
typedef enum scale_parallel_pass {
1302
    SCALE_PASS_HORIZONTAL = 0,
1303
    SCALE_PASS_VERTICAL = 1
1304
} scale_parallel_pass_t;
1305

1306
typedef struct scale_parallel_context {
1307
    unsigned char *dst;
1308
    unsigned char const *src;
1309
    unsigned char *tmp;
1310
    int srcw;
1311
    int srch;
1312
    int dstw;
1313
    int dsth;
1314
    int depth;
1315
    resample_fn_t f_resample;
1316
    double n;
1317
    scale_parallel_pass_t pass;
1318
    int simd_level;
1319
    int band_span;
1320
    sixel_logger_t *logger;
1321
} scale_parallel_context_t;
1322

1323
/*
1324
 * Emit timeline entries for every band so downstream aggregation can compute
1325
 * first/last activity windows per thread without losing information.
1326
 */
1327
static int
1328
scale_parallel_should_log(scale_parallel_context_t const *ctx, int index)
150✔
1329
{
1330
    int span;
150✔
1331

1332
    if (ctx == NULL || ctx->logger == NULL || !ctx->logger->active) {
150!
1333
        return 0;
1334
    }
1335

1336
    if (index < 0) {
×
1337
        return 0;
1338
    }
1339

1340
    if (ctx->pass == SCALE_PASS_HORIZONTAL) {
×
1341
        span = ctx->srch;
1342
    } else {
1343
        span = ctx->dsth;
1344
    }
1345

1346
    if (span <= 0 || index >= span) {
×
1347
        return 0;
1348
    }
1349

1350
    return 1;
1351
}
1352

1353
/*
1354
 * Allow callers to raise the floor for parallel execution using
1355
 * SIXEL_SCALE_PARALLEL_MIN_BYTES. The default of zero preserves the previous
1356
 * eager behavior while permitting deployments to defer threading on tiny
1357
 * inputs.
1358
 */
1359
static size_t
1360
scale_parallel_min_bytes(void)
48✔
1361
{
1362
    static int initialized = 0;
48✔
1363
    static size_t threshold = 0;
48✔
1364
    char const *text;
48✔
1365
    char *endptr;
48✔
1366
    unsigned long long parsed;
48✔
1367

1368
    if (initialized) {
48!
1369
        return threshold;
1370
    }
1371

1372
    initialized = 1;
48✔
1373
    text = sixel_compat_getenv("SIXEL_SCALE_PARALLEL_MIN_BYTES");
48✔
1374
    if (text == NULL || text[0] == '\0') {
48!
1375
        return threshold;
48✔
1376
    }
1377

1378
    errno = 0;
1379
    parsed = strtoull(text, &endptr, 10);
1380
    if (endptr == text || *endptr != '\0' || errno == ERANGE) {
×
1381
        return threshold;
1382
    }
1383

1384
    if (parsed > (unsigned long long)SIZE_MAX) {
1385
        threshold = SIZE_MAX;
1386
    } else {
1387
        threshold = (size_t)parsed;
1388
    }
1389

1390
    return threshold;
1391
}
1392

1393
/*
1394
 * Choose the number of rows handled per threadpool job. We prefer an
1395
 * environment override via SIXEL_PARALLEL_FACTOR so deployments can tune
1396
 * queueing overhead. Otherwise derive a span from rows/threads and clamp to
1397
 * [1, rows]. The value is cached after the first lookup.
1398
 */
1399
static int
1400
scale_parallel_band_span(int rows, int threads)
18✔
1401
{
1402
    static int initialized = 0;
18✔
1403
    static int env_span = 0;
18✔
1404
    char const *text;
18✔
1405
    char *endptr;
18✔
1406
    long parsed;
18✔
1407
    int span;
18✔
1408

1409
    if (rows <= 0) {
18!
1410
        return 1;
1411
    }
1412

1413
    if (!initialized) {
18✔
1414
        initialized = 1;
9✔
1415
        text = sixel_compat_getenv("SIXEL_PARALLEL_FACTOR");
9✔
1416
        if (text != NULL && text[0] != '\0') {
9!
1417
            errno = 0;
1418
            parsed = strtol(text, &endptr, 10);
1419
            if (endptr != text && *endptr == '\0' && errno != ERANGE &&
×
1420
                parsed > 0 && parsed <= INT_MAX) {
×
1421
                env_span = (int)parsed;
1422
            }
1423
        }
1424
    }
1425

1426
    if (env_span > 0) {
18!
1427
        span = env_span;
1428
    } else {
1429
        span = rows / threads;
18✔
1430
    }
1431

1432
    if (span < 1) {
18✔
1433
        span = 1;
1434
    }
1435
    if (span > rows) {
18✔
1436
        span = rows;
1437
    }
1438

1439
    return span;
1440
}
1441

1442
static int
1443
scale_parallel_worker(tp_job_t job, void *userdata, void *workspace)
75✔
1444
{
1445
    scale_parallel_context_t *ctx;
75✔
1446
    int index;
75✔
1447
    char const *role;
75✔
1448
    int y0;
75✔
1449
    int y1;
75✔
1450
    int limit;
75✔
1451
    int y;
75✔
1452

1453
    (void)workspace;
75✔
1454
    ctx = (scale_parallel_context_t *)userdata;
75✔
1455
    if (ctx == NULL) {
75!
1456
        return SIXEL_BAD_ARGUMENT;
1457
    }
1458

1459
    role = "horizontal";
75✔
1460
    y0 = 0;
75✔
1461
    y1 = 0;
75✔
1462
    index = job.band_index;
75✔
1463
    limit = ctx->srch;
75✔
1464
    if (ctx->pass == SCALE_PASS_HORIZONTAL) {
75✔
1465
        limit = ctx->srch;
36✔
1466
    } else {
1467
        limit = ctx->dsth;
39✔
1468
    }
1469

1470
    if (index < 0 || index >= limit) {
75!
1471
        return SIXEL_BAD_ARGUMENT;
1472
    }
1473

1474
    y0 = index;
75✔
1475
    y1 = index + ctx->band_span;
75✔
1476
    if (y1 > limit) {
75✔
1477
        y1 = limit;
1478
    }
1479

1480
    if (ctx->pass == SCALE_PASS_HORIZONTAL) {
75✔
1481
        if (scale_parallel_should_log(ctx, index)) {
36!
1482
            sixel_logger_logf(ctx->logger,
1483
                              role,
1484
                              "scale",
1485
                              "start",
1486
                              index);
1487
        }
1488
        for (y = y0; y < y1; y++) {
13,211✔
1489
            scale_horizontal_row(ctx->tmp,
13,175✔
1490
                                 ctx->src,
1491
                                 ctx->srcw,
8,783✔
1492
                                 ctx->dstw,
8,783✔
1493
                                 ctx->depth,
8,783✔
1494
                                 y,
1495
                                 ctx->f_resample,
8,783✔
1496
                                 ctx->n,
8,783✔
1497
                                 ctx->simd_level);
8,783✔
1498
        }
1499
    } else {
1500
        role = "vertical";
39✔
1501
        if (scale_parallel_should_log(ctx, index)) {
39!
1502
            sixel_logger_logf(ctx->logger,
1503
                              role,
1504
                              "scale",
1505
                              "start",
1506
                              index);
1507
        }
1508
        for (y = y0; y < y1; y++) {
2,010✔
1509
            scale_vertical_row(ctx->dst,
1,971✔
1510
                               ctx->tmp,
1,971✔
1511
                               ctx->dstw,
1,314✔
1512
                               ctx->dsth,
1,314✔
1513
                               ctx->depth,
1,314✔
1514
                               ctx->srch,
1,314✔
1515
                               y,
1516
                               ctx->f_resample,
1,314✔
1517
                               ctx->n,
1,314✔
1518
                               ctx->simd_level);
1,314✔
1519
        }
1520
    }
1521

1522
    if (scale_parallel_should_log(ctx, index)) {
75!
1523
        sixel_logger_logf(ctx->logger,
1524
                          role,
1525
                          "scale",
1526
                          "finish",
1527
                          index);
1528
    }
1529

1530
    return SIXEL_OK;
1531
}
1532

1533
/*
1534
 * Parallel path mirrors the encoder and dither thread selection through
1535
 * sixel_threads_resolve(). Rows are batched into jobs for both passes so the
1536
 * caller can saturate the threadpool without altering the filtering math while
1537
 * reducing queue overhead.
1538
 */
1539
static int
1540
scale_with_resampling_parallel(
48✔
1541
    unsigned char *dst,
1542
    unsigned char const *src,
1543
    int const srcw,
1544
    int const srch,
1545
    int const dstw,
1546
    int const dsth,
1547
    int const depth,
1548
    resample_fn_t const f_resample,
1549
    double const n,
1550
    unsigned char *tmp,
1551
    sixel_logger_t *logger)
1552
{
1553
    scale_parallel_context_t ctx;
48✔
1554
    threadpool_t *pool;
48✔
1555
    tp_job_t job;
48✔
1556
    size_t image_bytes;
48✔
1557
    int threads;
48✔
1558
    int queue_depth;
48✔
1559
    int y;
48✔
1560
    int rc;
48✔
1561
    int logger_ready;
48✔
1562
    int horizontal_span;
48✔
1563
    int vertical_span;
48✔
1564

1565
    image_bytes = (size_t)srcw * (size_t)srch * (size_t)depth;
48✔
1566
    if (image_bytes < scale_parallel_min_bytes()) {
48!
1567
        if (logger != NULL) {
×
1568
            sixel_logger_logf(logger,
1569
                              "controller",
1570
                              "scale",
1571
                              "skip",
1572
                              -1);
1573
        }
1574
        return SIXEL_BAD_ARGUMENT;
1575
    }
1576

1577
    threads = sixel_threads_resolve();
48✔
1578
    if (threads < 2) {
48✔
1579
        if (logger != NULL) {
39!
1580
            sixel_logger_logf(logger,
1581
                              "controller",
1582
                              "scale",
1583
                              "skip",
1584
                              -1);
1585
        }
1586
        return SIXEL_BAD_ARGUMENT;
39✔
1587
    }
1588

1589
    logger_ready = logger != NULL && logger->active;
9!
1590
    if (logger_ready) {
1591
        sixel_logger_logf(logger,
1592
                          "controller",
1593
                          "scale",
1594
                          "start",
1595
                          -1);
1596
    }
1597

1598
    ctx.dst = dst;
9✔
1599
    ctx.src = src;
9✔
1600
    ctx.tmp = tmp;
9✔
1601
    ctx.srcw = srcw;
9✔
1602
    ctx.srch = srch;
9✔
1603
    ctx.dstw = dstw;
9✔
1604
    ctx.dsth = dsth;
9✔
1605
    ctx.depth = depth;
9✔
1606
    ctx.f_resample = f_resample;
9✔
1607
    ctx.n = n;
9✔
1608
    ctx.simd_level = sixel_scale_simd_level();
9✔
1609
    ctx.logger = logger_ready ? logger : NULL;
9!
1610

1611
    /*
1612
     * Batch rows to reduce queue churn. Prefer the environment override so
1613
     * deployments can pin a consistent span; otherwise derive a default from
1614
     * rows per thread.
1615
     */
1616
    horizontal_span = scale_parallel_band_span(srch, threads);
9✔
1617
    vertical_span = scale_parallel_band_span(dsth, threads);
9✔
1618

1619
    queue_depth = threads * 3;
9✔
1620
    if (queue_depth > srch) {
9✔
1621
        queue_depth = srch;
1622
    }
1623
    if (queue_depth < 1) {
9✔
1624
        queue_depth = 1;
1625
    }
1626

1627
    ctx.pass = SCALE_PASS_HORIZONTAL;
9✔
1628
    ctx.band_span = horizontal_span;
9✔
1629
    if (logger_ready) {
9!
1630
        sixel_logger_logf(logger,
1631
                          "controller",
1632
                          "scale",
1633
                          "pass_start",
1634
                          -1);
1635
    }
1636
    pool = threadpool_create(threads,
9✔
1637
                             queue_depth,
1638
                             0,
1639
                             scale_parallel_worker,
1640
                             &ctx,
1641
                             NULL);
1642
    if (pool == NULL) {
9!
1643
        return SIXEL_BAD_ALLOCATION;
1644
    }
1645

1646
    for (y = 0; y < srch; y += horizontal_span) {
45✔
1647
        job.band_index = y;
36✔
1648
        threadpool_push(pool, job);
36✔
1649
    }
1650
    threadpool_finish(pool);
9✔
1651
    rc = threadpool_get_error(pool);
9✔
1652
    threadpool_destroy(pool);
9✔
1653
    if (rc != SIXEL_OK) {
9!
1654
        return rc;
1655
    }
1656

1657
    if (logger_ready) {
9!
1658
        sixel_logger_logf(logger,
1659
                          "controller",
1660
                          "scale",
1661
                          "pass_finish",
1662
                          -1);
1663
    }
1664

1665
    queue_depth = threads * 3;
9✔
1666
    if (queue_depth > dsth) {
9✔
1667
        queue_depth = dsth;
1668
    }
1669
    if (queue_depth < 1) {
9✔
1670
        queue_depth = 1;
1671
    }
1672

1673
    ctx.pass = SCALE_PASS_VERTICAL;
9✔
1674
    ctx.band_span = vertical_span;
9✔
1675
    if (logger_ready) {
9!
1676
        sixel_logger_logf(logger,
1677
                          "controller",
1678
                          "scale",
1679
                          "pass_start",
1680
                          -1);
1681
    }
1682
    pool = threadpool_create(threads,
9✔
1683
                             queue_depth,
1684
                             0,
1685
                             scale_parallel_worker,
1686
                             &ctx,
1687
                             NULL);
1688
    if (pool == NULL) {
9!
1689
        return SIXEL_BAD_ALLOCATION;
1690
    }
1691

1692
    for (y = 0; y < dsth; y += vertical_span) {
48✔
1693
        job.band_index = y;
39✔
1694
        threadpool_push(pool, job);
39✔
1695
    }
1696
    threadpool_finish(pool);
9✔
1697
    rc = threadpool_get_error(pool);
9✔
1698
    threadpool_destroy(pool);
9✔
1699

1700
    if (logger_ready) {
9!
1701
        sixel_logger_logf(logger,
1702
                          "controller",
1703
                          "scale",
1704
                          "pass_finish",
1705
                          -1);
1706
        sixel_logger_logf(logger,
1707
                          "controller",
1708
                          "scale",
1709
                          "finish",
1710
                          -1);
1711
    }
1712

1713
    return rc;
1714
}
1715
#endif /* SIXEL_ENABLE_THREADS */
1716

1717
/*
1718
 * Allocate shared scratch storage and attempt the parallel pipeline first so
1719
 * larger inputs benefit from threading while smaller ones retain the serial
1720
 * behavior.
1721
 */
1722
static void
1723
scale_with_resampling(
77✔
1724
    unsigned char *dst,
1725
    unsigned char const *src,
1726
    int const srcw,
1727
    int const srch,
1728
    int const dstw,
1729
    int const dsth,
1730
    int const depth,
1731
    resample_fn_t const f_resample,
1732
    double n,
1733
    sixel_allocator_t *allocator)
1734
{
1735
    unsigned char *tmp;
77✔
1736
    size_t tmp_size;
77✔
1737
#if SIXEL_ENABLE_THREADS
1738
    int rc;
48✔
1739
    sixel_logger_t logger;
48✔
1740
    int logger_prepared;
48✔
1741
#endif
1742

1743
#if SIXEL_ENABLE_THREADS
1744
    sixel_logger_init(&logger);
48✔
1745
    logger_prepared = 0;
48✔
1746
    (void)sixel_logger_prepare_env(&logger);
48✔
1747
    logger_prepared = logger.active;
48✔
1748
#endif
1749

1750
    tmp_size = (size_t)dstw * (size_t)srch * (size_t)depth;
77✔
1751
    tmp = (unsigned char *)sixel_allocator_malloc(allocator, tmp_size);
77✔
1752
    if (tmp == NULL) {
77!
1753
#if SIXEL_ENABLE_THREADS
1754
        if (logger_prepared) {
×
1755
            sixel_logger_close(&logger);
1756
        }
1757
#endif
1758
        return;
9✔
1759
    }
1760

1761
#if SIXEL_ENABLE_THREADS
1762
    rc = scale_with_resampling_parallel(dst,
96!
1763
                                        src,
1764
                                        srcw,
1765
                                        srch,
1766
                                        dstw,
1767
                                        dsth,
1768
                                        depth,
1769
                                        f_resample,
1770
                                        n,
1771
                                        tmp,
1772
                                        logger_prepared
1773
                                            ? &logger
1774
                                            : NULL);
1775
    if (rc == SIXEL_OK) {
48✔
1776
        sixel_allocator_free(allocator, tmp);
9✔
1777
        if (logger_prepared) {
9!
1778
            sixel_logger_close(&logger);
1779
        }
1780
        return;
9✔
1781
    }
1782

1783
    if (logger_prepared) {
39!
1784
        sixel_logger_logf(&logger,
1785
                          "controller",
1786
                          "scale",
1787
                          "fallback",
1788
                          -1);
1789
    }
1790
#endif
1791

1792
    scale_with_resampling_serial(dst,
68✔
1793
                                 src,
1794
                                 srcw,
1795
                                 srch,
1796
                                 dstw,
1797
                                 dsth,
1798
                                 depth,
1799
                                 f_resample,
1800
                                 n,
1801
                                 tmp);
1802

1803
    sixel_allocator_free(allocator, tmp);
68✔
1804
#if SIXEL_ENABLE_THREADS
1805
    if (logger_prepared) {
39!
1806
        sixel_logger_close(&logger);
1807
    }
1808
#endif
1809
}
1810

1811
/*
1812
 * Floating-point scaler mirrors the byte-path SSE2 usage. Keep it noinline
1813
 * on i386 so the SIXEL_ALIGN_STACK prologue stays in place when SSE2 locals
1814
 * need to spill to the stack.
1815
 */
1816
static SIXEL_ALIGN_STACK SIXEL_NO_INLINE void
1817
scale_with_resampling_float32(
83✔
1818
    float *dst,
1819
    float const *src,
1820
    int const srcw,
1821
    int const srch,
1822
    int const dstw,
1823
    int const dsth,
1824
    int const depth,
1825
    resample_fn_t const f_resample,
1826
    double n,
1827
    sixel_allocator_t *allocator)
1828
{
1829
    int w;
83✔
1830
    int h;
83✔
1831
    int x;
83✔
1832
    int y;
83✔
1833
    int i;
83✔
1834
    int pos;
83✔
1835
    int x_first;
83✔
1836
    int x_last;
83✔
1837
    int y_first;
83✔
1838
    int y_last;
83✔
1839
    double center_x;
83✔
1840
    double center_y;
83✔
1841
    double diff_x;
83✔
1842
    double diff_y;
83✔
1843
    double weight;
83✔
1844
    double total;
83✔
1845
    double offsets[8];
83✔
1846
    float *tmp;
83✔
1847
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
1848
    float vecbuf[4];
48✔
1849
#endif
1850
    int simd_level;
83✔
1851
#if defined(SIXEL_USE_AVX512)
1852
    __m512 acc512;
1853
    __m512 pix512;
1854
#endif
1855
#if defined(SIXEL_USE_AVX2) || defined(SIXEL_USE_AVX)
1856
    __m256 acc256;
1857
#endif
1858
#if defined(SIXEL_USE_SSE2)
1859
    __m128 acc128;
48✔
1860
    __m128 pixf128;
48✔
1861
    __m128 wv128;
48✔
1862
    __m128 scalev128;
48✔
1863
    __m128 minv128;
48✔
1864
    __m128 maxv128;
48✔
1865
#elif defined(SIXEL_USE_NEON)
1866
    float32x4_t acc_neon;
1867
    float32x4_t pixf_neon;
1868
    float32x4_t wv_neon;
1869
    float32x4_t scalev_neon;
1870
    float32x4_t minv_neon;
1871
    float32x4_t maxv_neon;
1872
#endif
1873

1874
    tmp = (float *)sixel_allocator_malloc(
166✔
1875
        allocator,
1876
        (size_t)(dstw * srch * depth * (int)sizeof(float)));
83✔
1877
    if (tmp == NULL) {
83!
1878
        return;
×
1879
    }
1880

1881
    simd_level = sixel_scale_simd_level();
83✔
1882
#if !defined(SIXEL_USE_AVX512) && !defined(SIXEL_USE_AVX2) && \
1883
    !defined(SIXEL_USE_AVX) && !defined(SIXEL_USE_SSE2) && \
1884
    !defined(SIXEL_USE_NEON)
1885
    /*
1886
     * GCC i686 builds can reach this function with every SIMD backend
1887
     * compiled out; acknowledge the detection result to avoid an unused
1888
     * write while keeping the signature intact.
1889
     */
1890
    (void)simd_level;
35✔
1891
#endif
1892

1893
    for (y = 0; y < srch; y++) {
33,821✔
1894
        for (w = 0; w < dstw; w++) {
5,911,615✔
1895
            total = 0.0;
23,511,840✔
1896
            for (i = 0; i < depth; i++) {
23,511,840✔
1897
                offsets[i] = 0.0;
17,633,880✔
1898
            }
1899

1900
        if (dstw >= srcw) {
5,877,960✔
1901
            center_x = (w + 0.5) * srcw / dstw;
464,010✔
1902
            x_first = MAX((int)(center_x - n), 0);
464,010✔
1903
            x_last = MIN((int)(center_x + n), srcw - 1);
464,010✔
1904
        } else {
1905
            center_x = w + 0.5;
5,413,950✔
1906
            x_first = MAX((int)floor((center_x - n) * srcw / dstw), 0);
5,413,950✔
1907
            x_last = MIN((int)floor((center_x + n) * srcw / dstw),
5,413,950✔
1908
                         srcw - 1);
1909
        }
1910

1911
#if defined(SIXEL_USE_AVX512)
1912
            if (depth == 3 &&
1913
                simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
1914
                acc512 = sixel_avx512_zero_ps();
1915

1916
                for (x = x_first; x <= x_last; x++) {
1917
                    diff_x = (dstw >= srcw)
1918
                                 ? (x + 0.5) - center_x
1919
                                 : (x + 0.5) * srcw / dstw - center_x;
1920
                    weight = f_resample(fabs(diff_x));
1921
                    pos = (y * srcw + x) * depth;
1922
                    pix512 = sixel_avx512_load_rgb_f32(src + pos);
1923
                    acc512 = sixel_avx512_muladd_ps(
1924
                        acc512,
1925
                        pix512,
1926
                        (float)weight);
1927
                    total += weight;
1928
                }
1929
                if (total > 0.0) {
1930
                    pos = (y * dstw + w) * depth;
1931
                    sixel_avx512_store_rgb_f32(&acc512, total, tmp + pos);
1932
                }
1933
            } else
1934
#endif
1935
#if defined(SIXEL_USE_AVX2)
1936
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
1937
                acc256 = sixel_avx2_zero_ps();
1938

1939
                for (x = x_first; x <= x_last; x++) {
1940
                    diff_x = (dstw >= srcw)
1941
                                 ? (x + 0.5) - center_x
1942
                                 : (x + 0.5) * srcw / dstw - center_x;
1943
                    weight = f_resample(fabs(diff_x));
1944
                    pos = (y * srcw + x) * depth;
1945
                    acc256 = sixel_avx2_muladd_ps(
1946
                        acc256,
1947
                        sixel_avx2_load_rgb_f32(src + pos),
1948
                        (float)weight);
1949
                    total += weight;
1950
                }
1951
                if (total > 0.0) {
1952
                    pos = (y * dstw + w) * depth;
1953
                    sixel_avx2_store_rgb_f32(acc256, total, tmp + pos);
1954
                }
1955
            } else
1956
#endif
1957
#if defined(SIXEL_USE_AVX)
1958
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
1959
                acc256 = sixel_avx_zero_ps();
1960

1961
                for (x = x_first; x <= x_last; x++) {
1962
                    diff_x = (dstw >= srcw)
1963
                                 ? (x + 0.5) - center_x
1964
                                 : (x + 0.5) * srcw / dstw - center_x;
1965
                    weight = f_resample(fabs(diff_x));
1966
                    pos = (y * srcw + x) * depth;
1967
                    acc256 = sixel_avx_muladd_ps(
1968
                        acc256,
1969
                        sixel_avx_load_rgb_f32(src + pos),
1970
                        (float)weight);
1971
                    total += weight;
1972
                }
1973
                if (total > 0.0) {
1974
                    pos = (y * dstw + w) * depth;
1975
                    sixel_avx_store_rgb_f32(acc256, total, tmp + pos);
1976
                }
1977
            } else
1978
#endif
1979
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
1980
            if (depth == 3
3,417,056✔
1981
# if defined(SIXEL_USE_SSE2)
1982
                && simd_level >= SIXEL_SIMD_LEVEL_SSE2
3,417,056!
1983
# elif defined(SIXEL_USE_NEON)
1984
                && simd_level >= SIXEL_SIMD_LEVEL_NEON
1985
# endif
1986
                ) {
1987
#if defined(SIXEL_USE_SSE2)
1988
                acc128 = _mm_setzero_ps();
1989
                minv128 = _mm_set1_ps(0.0f);
1990
                maxv128 = _mm_set1_ps(1.0f);
1991
#elif defined(SIXEL_USE_NEON)
1992
                acc_neon = vdupq_n_f32(0.0f);
1993
                minv_neon = vdupq_n_f32(0.0f);
1994
                maxv_neon = vdupq_n_f32(1.0f);
1995
#endif
1996
                for (x = x_first; x <= x_last; x++) {
40,608,162✔
1997
                    diff_x = (dstw >= srcw)
74,382,212✔
1998
                                 ? (x + 0.5) - center_x
814,006✔
1999
                                 : (x + 0.5) * srcw / dstw - center_x;
37,191,106✔
2000
                    weight = f_resample(fabs(diff_x));
37,191,106✔
2001
                    pos = (y * srcw + x) * depth;
37,191,106✔
2002
                    const float *psrc = src + pos;
37,191,106✔
2003
#if defined(SIXEL_USE_SSE2)
2004
                    pixf128 = _mm_set_ps(
37,191,106✔
2005
                        0.0f, psrc[2], psrc[1], psrc[0]);
2006
                    wv128 = _mm_set1_ps((float)weight);
37,191,106✔
2007
                    acc128 = _mm_add_ps(acc128,
37,191,106✔
2008
                                        _mm_mul_ps(pixf128, wv128));
2009
#else /* NEON */
2010
                    /*
2011
                     * Expand the RGB triple into a NEON vector without
2012
                     * brace initialization to keep older toolchains
2013
                     * happy.
2014
                     */
2015
                    pixf_neon = vdupq_n_f32(0.0f);
2016
                    pixf_neon = vsetq_lane_f32(psrc[0], pixf_neon, 0);
2017
                    pixf_neon = vsetq_lane_f32(psrc[1], pixf_neon, 1);
2018
                    pixf_neon = vsetq_lane_f32(psrc[2], pixf_neon, 2);
2019
                    wv_neon = vdupq_n_f32((float)weight);
2020
                    acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
2021
#endif
2022
                    total += weight;
37,191,106✔
2023
                }
2024
                if (total > 0.0) {
3,417,056✔
2025
#if defined(SIXEL_USE_SSE2)
2026
                    scalev128 = _mm_set1_ps((float)(1.0 / total));
646,406✔
2027
                    acc128 = _mm_mul_ps(acc128, scalev128);
646,406✔
2028
                    acc128 = _mm_max_ps(minv128,
646,406✔
2029
                                        _mm_min_ps(acc128, maxv128));
2030
                    _mm_storeu_ps(vecbuf, acc128);
646,406✔
2031
#else /* NEON */
2032
                    scalev_neon = vdupq_n_f32(
2033
                        (float)(1.0 / total));
2034
                    acc_neon = vmulq_f32(acc_neon, scalev_neon);
2035
                    acc_neon = vmaxq_f32(minv_neon,
2036
                                         vminq_f32(acc_neon, maxv_neon));
2037
                    vst1q_f32(vecbuf, acc_neon);
2038
#endif
2039
                    pos = (y * dstw + w) * depth;
646,406✔
2040
                    tmp[pos + 0] = vecbuf[0];
646,406✔
2041
                    tmp[pos + 1] = vecbuf[1];
646,406✔
2042
                    tmp[pos + 2] = vecbuf[2];
646,406✔
2043
                }
2044
            } else
2045
#endif
2046
            {
2047
                for (x = x_first; x <= x_last; x++) {
29,961,008!
2048
                    diff_x = (dstw >= srcw)
55,000,208✔
2049
                                 ? (x + 0.5) - center_x
574,804✔
2050
                                 : (x + 0.5) * srcw / dstw - center_x;
27,500,104!
2051
                    weight = f_resample(fabs(diff_x));
27,500,104✔
2052
                    for (i = 0; i < depth; i++) {
137,500,520!
2053
                        pos = (y * srcw + x) * depth + i;
82,500,312✔
2054
                        offsets[i] += src[pos] * weight;
82,500,312✔
2055
                    }
2056
                    total += weight;
27,500,104✔
2057
                }
2058

2059
                if (total > 0.0) {
2,460,904!
2060
                    for (i = 0; i < depth; i++) {
1,651,816!
2061
                        pos = (y * dstw + w) * depth + i;
1,238,862✔
2062
                        tmp[pos] = sixel_clamp_unit_f32(
1,238,862✔
2063
                            (float)(offsets[i] / total));
1,238,862!
2064
                    }
2065
                }
2066
            }
2067
        }
2068
    }
2069

2070
    for (h = 0; h < dsth; h++) {
11,960✔
2071
        for (w = 0; w < dstw; w++) {
2,938,745✔
2072
            total = 0.0;
11,707,472✔
2073
            for (i = 0; i < depth; i++) {
11,707,472✔
2074
                offsets[i] = 0.0;
8,780,604✔
2075
            }
2076

2077
            if (dsth >= srch) {
2,926,868✔
2078
                center_y = (h + 0.5) * srch / dsth;
1,333,020✔
2079
                y_first = MAX((int)(center_y - n), 0);
1,333,020✔
2080
                y_last = MIN((int)(center_y + n), srch - 1);
1,333,020✔
2081
            } else {
2082
                center_y = h + 0.5;
1,593,848✔
2083
                y_first = MAX((int)floor((center_y - n) * srch / dsth), 0);
1,593,848✔
2084
                y_last = MIN((int)floor((center_y + n) * srch / dsth),
1,593,848✔
2085
                             srch - 1);
2086
            }
2087

2088
#if defined(SIXEL_USE_AVX512)
2089
            if (depth == 3 &&
2090
                simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
2091
                acc512 = sixel_avx512_zero_ps();
2092

2093
                for (y = y_first; y <= y_last; y++) {
2094
                    diff_y = (dsth >= srch)
2095
                                 ? (y + 0.5) - center_y
2096
                                 : (y + 0.5) * dsth / srch - center_y;
2097
                    weight = f_resample(fabs(diff_y));
2098
                    pos = (y * dstw + w) * depth;
2099
                    pix512 = sixel_avx512_load_rgb_f32(tmp + pos);
2100
                    acc512 = sixel_avx512_muladd_ps(
2101
                        acc512,
2102
                        pix512,
2103
                        (float)weight);
2104
                    total += weight;
2105
                }
2106
                if (total > 0.0) {
2107
                    pos = (h * dstw + w) * depth;
2108
                    sixel_avx512_store_rgb_f32(&acc512, total, dst + pos);
2109
                }
2110
            } else
2111
#endif
2112
#if defined(SIXEL_USE_AVX2)
2113
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
2114
                acc256 = sixel_avx2_zero_ps();
2115

2116
                for (y = y_first; y <= y_last; y++) {
2117
                    diff_y = (dsth >= srch)
2118
                                 ? (y + 0.5) - center_y
2119
                                 : (y + 0.5) * dsth / srch - center_y;
2120
                    weight = f_resample(fabs(diff_y));
2121
                    pos = (y * dstw + w) * depth;
2122
                    acc256 = sixel_avx2_muladd_ps(
2123
                        acc256,
2124
                        sixel_avx2_load_rgb_f32(tmp + pos),
2125
                        (float)weight);
2126
                    total += weight;
2127
                }
2128
                if (total > 0.0) {
2129
                    pos = (h * dstw + w) * depth;
2130
                    sixel_avx2_store_rgb_f32(acc256, total, dst + pos);
2131
                }
2132
            } else
2133
#endif
2134
#if defined(SIXEL_USE_AVX)
2135
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
2136
                acc256 = sixel_avx_zero_ps();
2137

2138
                for (y = y_first; y <= y_last; y++) {
2139
                    diff_y = (dsth >= srch)
2140
                                 ? (y + 0.5) - center_y
2141
                                 : (y + 0.5) * dsth / srch - center_y;
2142
                    weight = f_resample(fabs(diff_y));
2143
                    pos = (y * dstw + w) * depth;
2144
                    acc256 = sixel_avx_muladd_ps(
2145
                        acc256,
2146
                        sixel_avx_load_rgb_f32(tmp + pos),
2147
                        (float)weight);
2148
                    total += weight;
2149
                }
2150
                if (total > 0.0) {
2151
                    pos = (h * dstw + w) * depth;
2152
                    sixel_avx_store_rgb_f32(acc256, total, dst + pos);
2153
                }
2154
            } else
2155
#endif
2156
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
2157
            if (depth == 3
1,688,284✔
2158
# if defined(SIXEL_USE_SSE2)
2159
                && simd_level >= SIXEL_SIMD_LEVEL_SSE2
1,688,284!
2160
# elif defined(SIXEL_USE_NEON)
2161
                && simd_level >= SIXEL_SIMD_LEVEL_NEON
2162
# endif
2163
                ) {
2164
#if defined(SIXEL_USE_SSE2)
2165
                acc128 = _mm_setzero_ps();
2166
                minv128 = _mm_set1_ps(0.0f);
2167
                maxv128 = _mm_set1_ps(1.0f);
2168
#elif defined(SIXEL_USE_NEON)
2169
                acc_neon = vdupq_n_f32(0.0f);
2170
                minv_neon = vdupq_n_f32(0.0f);
2171
                maxv_neon = vdupq_n_f32(1.0f);
2172
#endif
2173
                for (y = y_first; y <= y_last; y++) {
13,844,408✔
2174
                    diff_y = (dsth >= srch)
24,312,248✔
2175
                                 ? (y + 0.5) - center_y
2,230,932✔
2176
                                 : (y + 0.5) * dsth / srch - center_y;
12,156,124✔
2177
                    weight = f_resample(fabs(diff_y));
12,156,124✔
2178
                    pos = (y * dstw + w) * depth;
12,156,124✔
2179
                    const float *psrc = tmp + pos;
12,156,124✔
2180
#if defined(SIXEL_USE_SSE2)
2181
                    pixf128 = _mm_set_ps(
12,156,124✔
2182
                        0.0f, psrc[2], psrc[1], psrc[0]);
2183
                    wv128 = _mm_set1_ps((float)weight);
12,156,124✔
2184
                    acc128 = _mm_add_ps(acc128,
12,156,124✔
2185
                                        _mm_mul_ps(pixf128, wv128));
2186
#else /* NEON */
2187
                    /*
2188
                     * Expand the RGB triple into a NEON vector without
2189
                     * brace initialization to keep older toolchains
2190
                     * happy.
2191
                     */
2192
                    pixf_neon = vdupq_n_f32(0.0f);
2193
                    pixf_neon = vsetq_lane_f32(psrc[0], pixf_neon, 0);
2194
                    pixf_neon = vsetq_lane_f32(psrc[1], pixf_neon, 1);
2195
                    pixf_neon = vsetq_lane_f32(psrc[2], pixf_neon, 2);
2196
                    wv_neon = vdupq_n_f32((float)weight);
2197
                    acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
2198
#endif
2199
                    total += weight;
12,156,124✔
2200
                }
2201
                if (total > 0.0) {
1,688,284!
2202
#if defined(SIXEL_USE_SSE2)
2203
                    scalev128 = _mm_set1_ps((float)(1.0 / total));
1,688,284✔
2204
                    acc128 = _mm_mul_ps(acc128, scalev128);
1,688,284✔
2205
                    acc128 = _mm_max_ps(minv128,
1,688,284✔
2206
                                        _mm_min_ps(acc128, maxv128));
2207
                    _mm_storeu_ps(vecbuf, acc128);
1,688,284✔
2208
#else /* NEON */
2209
                    scalev_neon = vdupq_n_f32(
2210
                        (float)(1.0 / total));
2211
                    acc_neon = vmulq_f32(acc_neon, scalev_neon);
2212
                    acc_neon = vmaxq_f32(minv_neon,
2213
                                         vminq_f32(acc_neon, maxv_neon));
2214
                    vst1q_f32(vecbuf, acc_neon);
2215
#endif
2216
                    pos = (h * dstw + w) * depth;
1,688,284✔
2217
                    dst[pos + 0] = vecbuf[0];
1,688,284✔
2218
                    dst[pos + 1] = vecbuf[1];
1,688,284✔
2219
                    dst[pos + 2] = vecbuf[2];
1,688,284✔
2220
                }
2221
            } else
2222
#endif
2223
            {
2224
                for (y = y_first; y <= y_last; y++) {
10,151,824!
2225
                    diff_y = (dsth >= srch)
17,826,480✔
2226
                                 ? (y + 0.5) - center_y
1,752,528✔
2227
                                 : (y + 0.5) * dsth / srch - center_y;
8,913,240!
2228
                    weight = f_resample(fabs(diff_y));
8,913,240✔
2229
                    for (i = 0; i < depth; i++) {
44,566,200!
2230
                        pos = (y * dstw + w) * depth + i;
26,739,720✔
2231
                        offsets[i] += tmp[pos] * weight;
26,739,720✔
2232
                    }
2233
                    total += weight;
8,913,240✔
2234
                }
2235

2236
                if (total > 0.0) {
1,238,584!
2237
                    for (i = 0; i < depth; i++) {
4,954,336!
2238
                        pos = (h * dstw + w) * depth + i;
3,715,752✔
2239
                        dst[pos] = sixel_clamp_unit_f32(
3,715,752✔
2240
                            (float)(offsets[i] / total));
3,715,752!
2241
                    }
2242
                }
2243
            }
2244
        }
2245
    }
2246

2247
    sixel_allocator_free(allocator, tmp);
83✔
2248
}
2249

2250

2251
SIXELAPI int
2252
sixel_helper_scale_image(
88✔
2253
    unsigned char       /* out */ *dst,
2254
    unsigned char const /* in */  *src,                   /* source image data */
2255
    int                 /* in */  srcw,                   /* source image width */
2256
    int                 /* in */  srch,                   /* source image height */
2257
    int                 /* in */  pixelformat,            /* one of enum pixelFormat */
2258
    int                 /* in */  dstw,                   /* destination image width */
2259
    int                 /* in */  dsth,                   /* destination image height */
2260
    int                 /* in */  method_for_resampling,  /* one of methodForResampling */
2261
    sixel_allocator_t   /* in */  *allocator)             /* allocator object */
2262
{
2263
    /*
2264
     * Convert the source image to RGB24 if necessary and scale it to the
2265
     * requested destination size.  The caller supplies an allocator used
2266
     * for any temporary buffers required during conversion or filtering.
2267
     */
2268
    int const depth = sixel_helper_compute_depth(pixelformat);
88✔
2269
    unsigned char *new_src = NULL;  /* optional converted source buffer */
88✔
2270
    int nret;
88✔
2271
    int new_pixelformat;
88✔
2272

2273
    /* ensure the scaler operates on RGB triples */
2274
    if (depth != 3) {
88!
2275
        new_src = (unsigned char *)sixel_allocator_malloc(allocator,
×
2276
                                                          (size_t)(srcw * srch * 3));
×
2277
        if (new_src == NULL) {
×
2278
            return (-1);
2279
        }
2280
        nret = sixel_helper_normalize_pixelformat(new_src,
×
2281
                                                  &new_pixelformat,
2282
                                                  src, pixelformat,
2283
                                                  srcw, srch);
2284
        if (nret != 0) {
×
2285
            sixel_allocator_free(allocator, new_src);
×
2286
            return (-1);
×
2287
        }
2288

2289
        src = new_src;  /* use converted buffer from here on */
2290
    } else {
2291
        new_pixelformat = pixelformat;
88✔
2292
    }
2293

2294
    /* choose re-sampling strategy */
2295
    switch (method_for_resampling) {
88!
2296
    case SIXEL_RES_NEAREST:
11✔
2297
        scale_without_resampling(dst, src, srcw, srch, dstw, dsth, depth);
11✔
2298
        break;
11✔
2299
    case SIXEL_RES_GAUSSIAN:
3✔
2300
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
3✔
2301
                              gaussian, 1.0, allocator);
2302
        break;
3✔
2303
    case SIXEL_RES_HANNING:
×
2304
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
×
2305
                              hanning, 1.0, allocator);
2306
        break;
×
2307
    case SIXEL_RES_HAMMING:
2✔
2308
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
2✔
2309
                              hamming, 1.0, allocator);
2310
        break;
2✔
2311
    case SIXEL_RES_WELSH:
×
2312
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
×
2313
                              welsh, 1.0, allocator);
2314
        break;
×
2315
    case SIXEL_RES_BICUBIC:
3✔
2316
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
3✔
2317
                              bicubic, 2.0, allocator);
2318
        break;
3✔
2319
    case SIXEL_RES_LANCZOS2:
2✔
2320
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
2✔
2321
                              lanczos2, 2.0, allocator);
2322
        break;
2✔
2323
    case SIXEL_RES_LANCZOS3:
5✔
2324
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
5✔
2325
                              lanczos3, 3.0, allocator);
2326
        break;
5✔
2327
    case SIXEL_RES_LANCZOS4:
×
2328
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
×
2329
                              lanczos4, 4.0, allocator);
2330
        break;
×
2331
    case SIXEL_RES_BILINEAR:
62✔
2332
    default:
2333
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
62✔
2334
                              bilinear, 1.0, allocator);
2335
        break;
62✔
2336
    }
2337

2338
    /* release temporary copy created for pixel-format normalization */
2339
    sixel_allocator_free(allocator, new_src);
88✔
2340
    return 0;
88✔
2341
}
2342

2343
SIXELAPI int
2344
sixel_helper_scale_image_float32(
107✔
2345
    float             /* out */ *dst,
2346
    float const       /* in */  *src,
2347
    int               /* in */  srcw,
2348
    int               /* in */  srch,
2349
    int               /* in */  pixelformat,
2350
    int               /* in */  dstw,
2351
    int               /* in */  dsth,
2352
    int               /* in */  method_for_resampling,
2353
    sixel_allocator_t /* in */  *allocator)
2354
{
2355
    int depth;
107✔
2356
    int depth_bytes;
107✔
2357

2358
    depth_bytes = sixel_helper_compute_depth(pixelformat);
107✔
2359
    if (depth_bytes <= 0) {
107!
2360
        return (-1);
2361
    }
2362

2363
    depth = depth_bytes / (int)sizeof(float);
107✔
2364
    if (depth * (int)sizeof(float) != depth_bytes) {
107!
2365
        return (-1);
2366
    }
2367

2368
    switch (method_for_resampling) {
107!
2369
    case SIXEL_RES_NEAREST:
24✔
2370
        scale_without_resampling_float32(
24✔
2371
            dst, src, srcw, srch, dstw, dsth, depth);
2372
        break;
24✔
2373
    case SIXEL_RES_GAUSSIAN:
2✔
2374
        scale_with_resampling_float32(
2✔
2375
            dst, src, srcw, srch, dstw, dsth, depth,
2376
            gaussian, 1.0, allocator);
2377
        break;
2✔
2378
    case SIXEL_RES_HANNING:
5✔
2379
        scale_with_resampling_float32(
5✔
2380
            dst, src, srcw, srch, dstw, dsth, depth,
2381
            hanning, 1.0, allocator);
2382
        break;
5✔
2383
    case SIXEL_RES_HAMMING:
3✔
2384
        scale_with_resampling_float32(
3✔
2385
            dst, src, srcw, srch, dstw, dsth, depth,
2386
            hamming, 1.0, allocator);
2387
        break;
3✔
2388
    case SIXEL_RES_WELSH:
5✔
2389
        scale_with_resampling_float32(
5✔
2390
            dst, src, srcw, srch, dstw, dsth, depth,
2391
            welsh, 1.0, allocator);
2392
        break;
5✔
2393
    case SIXEL_RES_BICUBIC:
7✔
2394
        scale_with_resampling_float32(
7✔
2395
            dst, src, srcw, srch, dstw, dsth, depth,
2396
            bicubic, 2.0, allocator);
2397
        break;
7✔
2398
    case SIXEL_RES_LANCZOS2:
8✔
2399
        scale_with_resampling_float32(
8✔
2400
            dst, src, srcw, srch, dstw, dsth, depth,
2401
            lanczos2, 2.0, allocator);
2402
        break;
8✔
2403
    case SIXEL_RES_LANCZOS3:
5✔
2404
        scale_with_resampling_float32(
5✔
2405
            dst, src, srcw, srch, dstw, dsth, depth,
2406
            lanczos3, 3.0, allocator);
2407
        break;
5✔
2408
    case SIXEL_RES_LANCZOS4:
5✔
2409
        scale_with_resampling_float32(
5✔
2410
            dst, src, srcw, srch, dstw, dsth, depth,
2411
            lanczos4, 4.0, allocator);
2412
        break;
5✔
2413
    case SIXEL_RES_BILINEAR:
43✔
2414
    default:
2415
        scale_with_resampling_float32(
43✔
2416
            dst, src, srcw, srch, dstw, dsth, depth,
2417
            bilinear, 1.0, allocator);
2418
        break;
43✔
2419
    }
2420

2421
    return 0;
2422
}
2423

2424

2425
#if defined(__GNUC__) && !defined(__clang__) && !defined(__PCC__)
2426
# pragma GCC diagnostic pop
2427
#endif
2428

2429
/* emacs Local Variables:      */
2430
/* emacs mode: c               */
2431
/* emacs tab-width: 4          */
2432
/* emacs indent-tabs-mode: nil */
2433
/* emacs c-basic-offset: 4     */
2434
/* emacs End:                  */
2435
/* vim: set expandtab ts=4 sts=4 sw=4 : */
2436
/* EOF */
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc