• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

saitoha / libsixel / 19918707358

04 Dec 2025 05:12AM UTC coverage: 38.402% (-4.0%) from 42.395%
19918707358

push

github

saitoha
tests: fix meson msys dll lookup

9738 of 38220 branches covered (25.48%)

12841 of 33438 relevant lines covered (38.4%)

782420.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

36.59
/src/scale.c
1
/*
2
 * SPDX-License-Identifier: MIT
3
 *
4
 * Copyright (c) 2021-2025 libsixel developers. See `AUTHORS`.
5
 * Copyright (c) 2014-2016 Hayaki Saito
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
8
 * this software and associated documentation files (the "Software"), to deal in
9
 * the Software without restriction, including without limitation the rights to
10
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
11
 * the Software, and to permit persons to whom the Software is furnished to do so,
12
 * subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in all
15
 * copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
19
 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
20
 * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
21
 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
 */
24

25
#include "config.h"
26

27
/* STDC_HEADERS */
28
#include <stdlib.h>
29

30
#if HAVE_ERRNO_H
31
# include <errno.h>
32
#endif  /* HAVE_ERRNO_H */
33
#if HAVE_LIMITS_H
34
# include <limits.h>
35
#endif  /* HAVE_LIMITS_H */
36
#if HAVE_STRING_H
37
# include <string.h>
38
#endif  /* HAVE_STRING_H */
39
#if HAVE_STDINT_H
40
# include <stdint.h>
41
#endif  /* HAVE_STDINT_H */
42

43
#if HAVE_MATH_H
44
# define _USE_MATH_DEFINES  /* for MSVC */
45
# include <math.h>
46
#endif  /* HAVE_MATH_H */
47
#ifndef M_PI
48
# define M_PI 3.14159265358979323846
49
#endif
50

51
#include <sixel.h>
52

53
#include "cpu.h"
54
#include "logger.h"
55
#include "compat_stub.h"
56

57
#if SIXEL_ENABLE_THREADS
58
# include "sixel_threads_config.h"
59
# include "threadpool.h"
60
#endif
61

62
#if defined(__GNUC__) && defined(__i386__)
63
/*
64
 * i386 callers may enter with only 4- or 8-byte stack alignment. Force
65
 * realignment for SSE2-heavy routines to avoid movaps spills to unaligned
66
 * stack slots when SIMD is enabled via SIXEL_SIMD_LEVEL. Mark affected
67
 * functions noinline so the prologue that performs realignment is not
68
 * dropped by inlining.
69
 */
70
# define SIXEL_ALIGN_STACK __attribute__((force_align_arg_pointer))
71
# define SIXEL_NO_INLINE __attribute__((noinline))
72
#else
73
# define SIXEL_ALIGN_STACK
74
# define SIXEL_NO_INLINE
75
#endif
76

77
#if defined(HAVE_IMMINTRIN_H) && \
78
    (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
79
     defined(_M_IX86))
80
# define SIXEL_HAS_X86_INTRIN 1
81
# include <immintrin.h>
82
#endif
83

84
#if defined(HAVE_SSE2)
85
# if defined(__SSE2__)
86
#  if defined(HAVE_EMMINTRIN_H)
87
#   include <emmintrin.h>
88
#   define SIXEL_USE_SSE2 1
89
#  endif
90
# endif
91
#endif
92

93
#if defined(SIXEL_HAS_X86_INTRIN)
94
# if defined(__GNUC__)
95
#  if !defined(__clang__)
96
#   define SIXEL_TARGET_AVX __attribute__((target("avx")))
97
#   define SIXEL_TARGET_AVX2 __attribute__((target("avx2")))
98
#   define SIXEL_TARGET_AVX512 __attribute__((target("avx512f")))
99
#   define SIXEL_USE_AVX 1
100
#   define SIXEL_USE_AVX2 1
101
#   define SIXEL_USE_AVX512 1
102
#  else
103
/*
104
 * clang rejects returning AVX vectors when the translation unit target
105
 * does not already include the corresponding ISA.  Guard runtime AVX
106
 * helpers with compile-time ISA availability to keep non-AVX builds
107
 * warning-free while still using AVX when the compiler enables it.
108
 */
109
#   define SIXEL_TARGET_AVX
110
#   define SIXEL_TARGET_AVX2
111
#   define SIXEL_TARGET_AVX512
112
#   if defined(__AVX__)
113
#    define SIXEL_USE_AVX 1
114
#   endif
115
#   if defined(__AVX2__)
116
#    define SIXEL_USE_AVX2 1
117
#   endif
118
#   if defined(__AVX512F__)
119
#    define SIXEL_USE_AVX512 1
120
#   endif
121
#  endif
122
# else
123
#  define SIXEL_TARGET_AVX
124
#  define SIXEL_TARGET_AVX2
125
#  define SIXEL_TARGET_AVX512
126
#  if defined(__AVX__)
127
#   define SIXEL_USE_AVX 1
128
#  endif
129
#  if defined(__AVX2__)
130
#   define SIXEL_USE_AVX2 1
131
#  endif
132
#  if defined(__AVX512F__)
133
#   define SIXEL_USE_AVX512 1
134
#  endif
135
# endif
136
#endif
137

138
#if defined(__GNUC__) && !defined(__clang__)
139
# pragma GCC diagnostic push
140
# pragma GCC diagnostic ignored "-Wpsabi"
141
#endif
142

143
#if defined(HAVE_NEON)
144
# if (defined(__ARM_NEON) || defined(__ARM_NEON__))
145
#  if defined(HAVE_ARM_NEON_H)
146
#   include <arm_neon.h>
147
#   define SIXEL_USE_NEON 1
148
#  endif
149
# endif
150
#endif
151

152
#if !defined(MAX)
153
# define MAX(l, r) ((l) > (r) ? (l) : (r))
154
#endif
155
#if !defined(MIN)
156
#define MIN(l, r) ((l) < (r) ? (l) : (r))
157
#endif
158

159

160
#if 0
161
/* function Nearest Neighbor */
162
static double
163
nearest_neighbor(double const d)
164
{
165
    if (d <= 0.5) {
166
        return 1.0;
167
    }
168
    return 0.0;
169
}
170
#endif
171

172

173
/* function Bi-linear */
174
static double
175
bilinear(double const d)
38,860,368✔
176
{
177
    if (d < 1.0) {
38,860,368✔
178
        return 1.0 - d;
31,476,348✔
179
    }
180
    return 0.0;
181
}
182

183

184
/* function Welsh */
185
static double
186
welsh(double const d)
2,512,200✔
187
{
188
    if (d < 1.0) {
2,512,200✔
189
        return 1.0 - d * d;
1,796,100✔
190
    }
191
    return 0.0;
192
}
193

194

195
/* function Bi-cubic */
196
static double
197
bicubic(double const d)
3,894,600✔
198
{
199
    if (d <= 1.0) {
3,894,600✔
200
        return 1.0 + (d - 2.0) * d * d;
1,880,100✔
201
    }
202
    if (d <= 2.0) {
2,014,500✔
203
        return 4.0 + d * (-8.0 + d * (5.0 - d));
1,860,300✔
204
    }
205
    return 0.0;
206
}
207

208

209
/* function sinc
210
 * sinc(x) = sin(PI * x) / (PI * x)
211
 */
212
static double
213
sinc(double const x)
22,842,858✔
214
{
215
    return sin(M_PI * x) / (M_PI * x);
22,842,858✔
216
}
217

218

219
/* function Lanczos-2
220
 * Lanczos(x) = sinc(x) * sinc(x / 2) , |x| <= 2
221
 *            = 0, |x| > 2
222
 */
223
static double
224
lanczos2(double const d)
9,311,994✔
225
{
226
    if (d == 0.0) {
9,311,994!
227
        return 1.0;
228
    }
229
    if (d < 2.0) {
9,311,994✔
230
        return sinc(d) * sinc(d / 2.0);
8,585,934✔
231
    }
232
    return 0.0;
233
}
234

235

236
/* function Lanczos-3
237
 * Lanczos(x) = sinc(x) * sinc(x / 3) , |x| <= 3
238
 *            = 0, |x| > 3
239
 */
240
static double
241
lanczos3(double const d)
6,079,794✔
242
{
243
    if (d == 0.0) {
6,079,794!
244
        return 1.0;
245
    }
246
    if (d < 3.0) {
6,079,794✔
247
        return sinc(d) * sinc(d / 3.0);
5,867,340✔
248
    }
249
    return 0.0;
250
}
251

252
/* function Lanczos-4
253
 * Lanczos(x) = sinc(x) * sinc(x / 4) , |x| <= 4
254
 *            = 0, |x| > 4
255
 */
256
static double
257
lanczos4(double const d)
8,711,172✔
258
{
259
    if (d == 0.0) {
8,711,172!
260
        return 1.0;
261
    }
262
    if (d < 4.0) {
8,711,172✔
263
        return sinc(d) * sinc(d / 4.0);
8,389,584✔
264
    }
265
    return 0.0;
266
}
267

268

269
static double
270
gaussian(double const d)
2,035,950✔
271
{
272
    return exp(-2.0 * d * d) * sqrt(2.0 / M_PI);
2,035,950✔
273
}
274

275

276
static double
277
hanning(double const d)
2,188,056✔
278
{
279
    return 0.5 + 0.5 * cos(d * M_PI);
2,188,056✔
280
}
281

282

283
static double
284
hamming(const double d)
2,512,200✔
285
{
286
    return 0.54 + 0.46 * cos(d * M_PI);
2,512,200✔
287
}
288

289

290
static unsigned char
291
normalize(double x, double total)
×
292
{
293
    int result;
×
294

295
    result = floor(x / total);
×
296
    if (result > 255) {
×
297
        return 0xff;
298
    }
299
    if (result < 0) {
×
300
        return 0x00;
301
    }
302
    return (unsigned char)result;
×
303
}
304

305
static int
306
sixel_scale_simd_level(void)
75✔
307
{
308
    static int simd_level = -2;
75✔
309

310
    if (simd_level == -2) {
75!
311
        simd_level = sixel_cpu_simd_level();
75✔
312
#if defined(__i386__)
313
        /*
314
         * AVX and later widen the alignment requirement for stack spills to
315
         * 32 bytes. i386 stack realignment from force_align_arg_pointer only
316
         * guarantees 16-byte boundaries, so keep the runtime level capped at
317
         * SSE2 to avoid vmovaps faults when YMM locals spill.
318
         */
319
        if (simd_level > SIXEL_SIMD_LEVEL_SSE2) {
320
            simd_level = SIXEL_SIMD_LEVEL_SSE2;
321
        }
322
#endif
323
    }
324

325
    return simd_level;
75✔
326
}
327

328
static float
329
sixel_clamp_unit_f32(float value)
×
330
{
331
    /*
332
     * Resampling kernels with negative lobes can push linear RGB values
333
     * outside the unit interval. Clamp here so downstream conversions do
334
     * not collapse to black.
335
     */
336
    if (value < 0.0f) {
×
337
        return 0.0f;
338
    }
339
    if (value > 1.0f) {
×
340
        return 1.0f;
341
    }
342

343
    return value;
344
}
345

346
#if defined(HAVE_IMMINTRIN_H)
347
#if defined(SIXEL_USE_AVX)
348
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
349
sixel_avx_load_rgb_ps(unsigned char const *psrc)
×
350
{
351
    __m128i pixi128;
×
352
    __m128 pixf128;
×
353
    __m256 pixf256;
×
354

355
    /*
356
     * Build the byte vector explicitly so the AVX path never accumulates
357
     * garbage data when widening to 32-bit lanes.
358
     */
359
    pixi128 = _mm_setr_epi8((char)psrc[0],
×
360
                            (char)psrc[1],
×
361
                            (char)psrc[2],
×
362
                            0,
363
                            0, 0, 0, 0,
364
                            0, 0, 0, 0,
365
                            0, 0, 0, 0);
366
    pixf128 = _mm_cvtepi32_ps(pixi128);
×
367
    pixf256 = _mm256_castps128_ps256(pixf128);
×
368
    pixf256 = _mm256_insertf128_ps(pixf256, _mm_setzero_ps(), 1);
×
369
    return pixf256;
×
370
}
371

372
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX void
373
sixel_avx_store_rgb_u8(__m256 acc, double total, unsigned char *dst)
×
374
{
375
    __m256 scalev;
×
376
    __m256 minv;
×
377
    __m256 maxv;
×
378
    __m256i acci;
×
379
    int out[8];
×
380

381
    scalev = _mm256_set1_ps((float)(1.0 / total));
×
382
    acc = _mm256_mul_ps(acc, scalev);
×
383
    minv = _mm256_set1_ps(0.0f);
×
384
    maxv = _mm256_set1_ps(255.0f);
×
385
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
×
386
    acci = _mm256_cvtps_epi32(acc);
×
387
    _mm256_storeu_si256((__m256i *)out, acci);
×
388
    dst[0] = (unsigned char)out[0];
×
389
    dst[1] = (unsigned char)out[1];
×
390
    dst[2] = (unsigned char)out[2];
×
391
}
×
392

393
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
394
sixel_avx_zero_ps(void)
×
395
{
396
    return _mm256_setzero_ps();
×
397
}
398

399
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
400
sixel_avx_muladd_ps(__m256 acc, __m256 pix, float weight)
×
401
{
402
    __m256 wv;
×
403

404
    wv = _mm256_set1_ps(weight);
×
405
    return _mm256_add_ps(acc, _mm256_mul_ps(pix, wv));
×
406
}
407

408
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX __m256
409
sixel_avx_load_rgb_f32(float const *psrc)
×
410
{
411
    __m256 pixf;
×
412

413
    pixf = _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f,
×
414
                         psrc[2], psrc[1], psrc[0], 0.0f);
415
    return pixf;
×
416
}
417

418
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX void
419
sixel_avx_store_rgb_f32(__m256 acc, double total, float *dst)
×
420
{
421
    __m256 scalev;
×
422
    __m256 minv;
×
423
    __m256 maxv;
×
424
    float out[8];
×
425

426
    scalev = _mm256_set1_ps((float)(1.0 / total));
×
427
    acc = _mm256_mul_ps(acc, scalev);
×
428
    minv = _mm256_set1_ps(0.0f);
×
429
    maxv = _mm256_set1_ps(1.0f);
×
430
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
×
431
    _mm256_storeu_ps(out, acc);
×
432
    dst[0] = out[0];
×
433
    dst[1] = out[1];
×
434
    dst[2] = out[2];
×
435
}
×
436
#endif  /* SIXEL_USE_AVX */
437

438
#if defined(SIXEL_USE_AVX2)
439
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
440
sixel_avx2_load_rgb_ps(unsigned char const *psrc)
76,106,334✔
441
{
442
    __m128i pixi128;
76,106,334✔
443
    __m256i pixi256;
76,106,334✔
444

445
    /*
446
     * Keep the unused bytes zeroed so widening to epi32 does not pull in
447
     * stack junk and bias every output channel toward white.
448
     */
449
    pixi128 = _mm_setr_epi8((char)psrc[0],
76,106,334✔
450
                            (char)psrc[1],
76,106,334✔
451
                            (char)psrc[2],
76,106,334✔
452
                            0,
453
                            0, 0, 0, 0,
454
                            0, 0, 0, 0,
455
                            0, 0, 0, 0);
456
    pixi256 = _mm256_cvtepu8_epi32(pixi128);
76,106,334✔
457
    return _mm256_cvtepi32_ps(pixi256);
76,106,334✔
458
}
459

460
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 void
461
sixel_avx2_store_rgb_u8(__m256 acc, double total, unsigned char *dst)
8,823,768✔
462
{
463
    __m256 scalev;
8,823,768✔
464
    __m256 minv;
8,823,768✔
465
    __m256 maxv;
8,823,768✔
466
    __m256i acci;
8,823,768✔
467
    int out[8];
8,823,768✔
468

469
    scalev = _mm256_set1_ps((float)(1.0 / total));
8,823,768✔
470
    acc = _mm256_mul_ps(acc, scalev);
8,823,768✔
471
    minv = _mm256_set1_ps(0.0f);
8,823,768✔
472
    maxv = _mm256_set1_ps(255.0f);
8,823,768✔
473
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
8,823,768✔
474
    acci = _mm256_cvtps_epi32(acc);
8,823,768✔
475
    _mm256_storeu_si256((__m256i *)out, acci);
476
    dst[0] = (unsigned char)out[0];
8,823,768✔
477
    dst[1] = (unsigned char)out[1];
8,823,768✔
478
    dst[2] = (unsigned char)out[2];
8,823,768✔
479
}
8,823,768✔
480

481
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
482
sixel_avx2_zero_ps(void)
8,823,768✔
483
{
484
    return _mm256_setzero_ps();
8,823,768✔
485
}
486

487
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
488
sixel_avx2_muladd_ps(__m256 acc, __m256 pix, float weight)
76,106,334✔
489
{
490
    __m256 wv;
76,106,334✔
491

492
    wv = _mm256_set1_ps(weight);
76,106,334✔
493
    return _mm256_add_ps(acc, _mm256_mul_ps(pix, wv));
76,106,334✔
494
}
495

496
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 __m256
497
sixel_avx2_load_rgb_f32(float const *psrc)
×
498
{
499
    __m256 pixf;
×
500

501
    pixf = _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f,
×
502
                         psrc[2], psrc[1], psrc[0], 0.0f);
503
    return pixf;
×
504
}
505

506
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX2 void
507
sixel_avx2_store_rgb_f32(__m256 acc, double total, float *dst)
×
508
{
509
    __m256 scalev;
×
510
    __m256 minv;
×
511
    __m256 maxv;
×
512
    float out[8];
×
513

514
    scalev = _mm256_set1_ps((float)(1.0 / total));
×
515
    acc = _mm256_mul_ps(acc, scalev);
×
516
    minv = _mm256_set1_ps(0.0f);
×
517
    maxv = _mm256_set1_ps(1.0f);
×
518
    acc = _mm256_max_ps(minv, _mm256_min_ps(acc, maxv));
×
519
    _mm256_storeu_ps(out, acc);
×
520
    dst[0] = out[0];
×
521
    dst[1] = out[1];
×
522
    dst[2] = out[2];
×
523
}
×
524
#endif  /* SIXEL_USE_AVX2 */
525

526
#if defined(SIXEL_USE_AVX512)
527
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
528
sixel_avx512_load_rgb_ps(unsigned char const *psrc)
×
529
{
530
    __m128i pixi128;
×
531
    __m512i pixi512;
×
532

533
    pixi128 = _mm_setr_epi8((char)psrc[0],
×
534
                            (char)psrc[1],
×
535
                            (char)psrc[2],
×
536
                            0,
537
                            0, 0, 0, 0,
538
                            0, 0, 0, 0,
539
                            0, 0, 0, 0);
540
    pixi512 = _mm512_cvtepu8_epi32(pixi128);
×
541
    return _mm512_cvtepi32_ps(pixi512);
×
542
}
543

544
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 void
545
sixel_avx512_store_rgb_u8(__m512 acc, double total, unsigned char *dst)
×
546
{
547
    __m512 scalev;
×
548
    __m512 minv;
×
549
    __m512 maxv;
×
550
    __m512i acci;
×
551
    int out[16];
×
552

553
    scalev = _mm512_set1_ps((float)(1.0 / total));
×
554
    acc = _mm512_mul_ps(acc, scalev);
×
555
    minv = _mm512_set1_ps(0.0f);
×
556
    maxv = _mm512_set1_ps(255.0f);
×
557
    acc = _mm512_max_ps(minv, _mm512_min_ps(acc, maxv));
×
558
    acci = _mm512_cvtps_epi32(acc);
×
559
    _mm512_storeu_si512((void *)out, acci);
×
560
    dst[0] = (unsigned char)out[0];
×
561
    dst[1] = (unsigned char)out[1];
×
562
    dst[2] = (unsigned char)out[2];
×
563
}
×
564

565
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
566
sixel_avx512_zero_ps(void)
×
567
{
568
    return _mm512_setzero_ps();
×
569
}
570

571
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
572
sixel_avx512_muladd_ps(__m512 acc, __m512 pix, float weight)
×
573
{
574
    __m512 wv;
×
575

576
    wv = _mm512_set1_ps(weight);
×
577
    return _mm512_add_ps(acc, _mm512_mul_ps(pix, wv));
×
578
}
579

580
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 __m512
581
sixel_avx512_load_rgb_f32(float const *psrc)
×
582
{
583
    __m512 pixf;
×
584

585
    pixf = _mm512_set_ps(0.0f, 0.0f, 0.0f, 0.0f,
×
586
                         0.0f, 0.0f, 0.0f, 0.0f,
587
                         0.0f, 0.0f, 0.0f, 0.0f,
588
                         psrc[2], psrc[1], psrc[0], 0.0f);
589
    return pixf;
×
590
}
591

592
static SIXEL_ALIGN_STACK SIXEL_TARGET_AVX512 void
593
sixel_avx512_store_rgb_f32(__m512 acc, double total, float *dst)
×
594
{
595
    __m512 scalev;
×
596
    __m512 minv;
×
597
    __m512 maxv;
×
598
    float out[16];
×
599

600
    scalev = _mm512_set1_ps((float)(1.0 / total));
×
601
    acc = _mm512_mul_ps(acc, scalev);
×
602
    minv = _mm512_set1_ps(0.0f);
×
603
    maxv = _mm512_set1_ps(1.0f);
×
604
    acc = _mm512_max_ps(minv, _mm512_min_ps(acc, maxv));
×
605
    _mm512_storeu_ps(out, acc);
×
606
    dst[0] = out[0];
×
607
    dst[1] = out[1];
×
608
    dst[2] = out[2];
×
609
}
×
610
#endif  /* SIXEL_USE_AVX512 */
611
#endif /* HAVE_IMMINTRIN_H */
612

613

614
static void
615
scale_without_resampling(
18✔
616
    unsigned char *dst,
617
    unsigned char const *src,
618
    int const srcw,
619
    int const srch,
620
    int const dstw,
621
    int const dsth,
622
    int const depth)
623
{
624
    int w;
18✔
625
    int h;
18✔
626
    int x;
18✔
627
    int y;
18✔
628
    int i;
18✔
629
    int pos;
18✔
630

631
    for (h = 0; h < dsth; h++) {
498✔
632
        for (w = 0; w < dstw; w++) {
191,640✔
633
            x = (long)w * srcw / dstw;
191,160✔
634
            y = (long)h * srch / dsth;
191,160✔
635
            for (i = 0; i < depth; i++) {
764,640✔
636
                pos = (y * srcw + x) * depth + i;
573,480✔
637
                dst[(h * dstw + w) * depth + i] = src[pos];
573,480✔
638
            }
639
        }
640
    }
641
}
18✔
642

643
static void
644
scale_without_resampling_float32(
×
645
    float *dst,
646
    float const *src,
647
    int const srcw,
648
    int const srch,
649
    int const dstw,
650
    int const dsth,
651
    int const depth)
652
{
653
    int w;
×
654
    int h;
×
655
    int x;
×
656
    int y;
×
657
    int i;
×
658
    int pos;
×
659

660
    for (h = 0; h < dsth; h++) {
×
661
        for (w = 0; w < dstw; w++) {
×
662
            x = (long)w * srcw / dstw;
×
663
            y = (long)h * srch / dsth;
×
664
            for (i = 0; i < depth; i++) {
×
665
                pos = (y * srcw + x) * depth + i;
×
666
                dst[(h * dstw + w) * depth + i] = src[pos];
×
667
            }
668
        }
669
    }
670
}
×
671

672

673
typedef double (*resample_fn_t)(double const d);
674

675
/*
676
 * Two-pass separable filter helpers. Each function processes a single row so
677
 * the caller may invoke them serially or from a threadpool worker. On i386 we
678
 * also mark the functions noinline to ensure the stack-realigning prologue
679
 * from SIXEL_ALIGN_STACK is preserved under optimization.
680
 */
681
static SIXEL_ALIGN_STACK SIXEL_NO_INLINE void
682
scale_horizontal_row(
31,950✔
683
    unsigned char *tmp,
684
    unsigned char const *src,
685
    int const srcw,
686
    int const dstw,
687
    int const depth,
688
    int const y,
689
    resample_fn_t const f_resample,
690
    double const n,
691
    int const simd_level)
692
{
693
    int w;
31,950✔
694
    int x;
31,950✔
695
    int i;
31,950✔
696
    int pos;
31,950✔
697
    int x_first;
31,950✔
698
    int x_last;
31,950✔
699
    double center_x;
31,950✔
700
    double diff_x;
31,950✔
701
    double weight;
31,950✔
702
    double total;
31,950✔
703
    double offsets[8];
31,950✔
704
#if defined(SIXEL_USE_AVX512)
705
    __m512 acc512;
31,950✔
706
#endif
707
#if defined(SIXEL_USE_AVX2) || defined(SIXEL_USE_AVX)
708
    __m256 acc256;
31,950✔
709
#endif
710
#if defined(SIXEL_USE_SSE2)
711
    /*
712
     * __m128 locals remain on the stack. On i386 callers may arrive with
713
     * only 4- or 8-byte alignment, so movaps spills can fault when SSE2 is
714
     * forced. SIXEL_ALIGN_STACK realigns the frame on entry to keep the
715
     * SSE2 path consistent with the 16-byte guarantee on x86_64.
716
     */
717
    __m128 acc128;
21,300✔
718
    __m128 minv128;
21,300✔
719
    __m128 maxv128;
21,300✔
720
    __m128 scalev128;
21,300✔
721
    __m128 wv128;
21,300✔
722
    __m128 pixf128;
21,300✔
723
    __m128i pixi128;
21,300✔
724
    __m128i acci128;
21,300✔
725
    __m128i acc16_128;
21,300✔
726
    unsigned int pixel128;
21,300✔
727
#endif
728
#if defined(SIXEL_USE_NEON)
729
    float32x4_t acc_neon;
730
    float32x4_t minv_neon;
731
    float32x4_t maxv_neon;
732
    float32x4_t scalev_neon;
733
    float32x4_t wv_neon;
734
    float32x4_t pixf_neon;
735
    uint32x4_t pix32_neon;
736
    uint32x4_t acci_neon;
737
    uint16x4_t acc16_neon;
738
    uint8x8_t acc8_neon;
739
    uint8_t outb_neon[8];
740
#endif
741

742
    for (w = 0; w < dstw; w++) {
5,662,350✔
743
        total = 0.0;
22,521,600✔
744
        for (i = 0; i < depth; i++) {
22,521,600✔
745
            offsets[i] = 0;
16,891,200✔
746
        }
747

748
        if (dstw >= srcw) {
5,630,400✔
749
            center_x = (w + 0.5) * srcw / dstw;
576,000✔
750
            x_first = MAX(center_x - n, 0);
576,000✔
751
            x_last = MIN(center_x + n, srcw - 1);
576,000✔
752
        } else {
753
            center_x = w + 0.5;
5,054,400✔
754
            x_first = MAX(floor((center_x - n) * srcw / dstw), 0);
5,054,400✔
755
            x_last = MIN(floor((center_x + n) * srcw / dstw), srcw - 1);
5,054,400✔
756
        }
757

758
#if defined(SIXEL_USE_AVX512)
759
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
5,630,400!
760
            acc512 = sixel_avx512_zero_ps();
×
761

762
            for (x = x_first; x <= x_last; x++) {
×
763
                diff_x = (dstw >= srcw)
×
764
                             ? (x + 0.5) - center_x
×
765
                             : (x + 0.5) * dstw / srcw - center_x;
×
766
                weight = f_resample(fabs(diff_x));
×
767
                pos = (y * srcw + x) * depth;
×
768
                acc512 = sixel_avx512_muladd_ps(
×
769
                    acc512,
770
                    sixel_avx512_load_rgb_ps(src + pos),
771
                    (float)weight);
772
                total += weight;
×
773
            }
774
            if (total > 0.0) {
×
775
                pos = (y * dstw + w) * depth;
×
776
                sixel_avx512_store_rgb_u8(acc512, total, tmp + pos);
×
777
            }
778
            continue;
×
779
        }
780
#endif
781
#if defined(SIXEL_USE_AVX2)
782
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
5,630,400!
783
            acc256 = sixel_avx2_zero_ps();
5,630,400✔
784

785
            for (x = x_first; x <= x_last; x++) {
62,287,650✔
786
                diff_x = (dstw >= srcw)
113,314,500✔
787
                             ? (x + 0.5) - center_x
1,724,400✔
788
                             : (x + 0.5) * dstw / srcw - center_x;
56,657,250✔
789
                weight = f_resample(fabs(diff_x));
56,657,250✔
790
                pos = (y * srcw + x) * depth;
56,657,250✔
791
                acc256 = sixel_avx2_muladd_ps(
56,657,250✔
792
                    acc256,
793
                    sixel_avx2_load_rgb_ps(src + pos),
794
                    (float)weight);
795
                total += weight;
56,657,250✔
796
            }
797
            if (total > 0.0) {
5,630,400!
798
                pos = (y * dstw + w) * depth;
5,630,400✔
799
                sixel_avx2_store_rgb_u8(acc256, total, tmp + pos);
5,630,400✔
800
            }
801
            continue;
5,630,400✔
802
        }
803
#endif
804
#if defined(SIXEL_USE_AVX)
805
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
×
806
            acc256 = sixel_avx_zero_ps();
×
807

808
            for (x = x_first; x <= x_last; x++) {
×
809
                diff_x = (dstw >= srcw)
×
810
                             ? (x + 0.5) - center_x
×
811
                             : (x + 0.5) * dstw / srcw - center_x;
×
812
                weight = f_resample(fabs(diff_x));
×
813
                pos = (y * srcw + x) * depth;
×
814
                acc256 = sixel_avx_muladd_ps(
×
815
                    acc256,
816
                    sixel_avx_load_rgb_ps(src + pos),
817
                    (float)weight);
818
                total += weight;
×
819
            }
820
            if (total > 0.0) {
×
821
                pos = (y * dstw + w) * depth;
×
822
                sixel_avx_store_rgb_u8(acc256, total, tmp + pos);
×
823
            }
824
            continue;
×
825
        }
826
#endif
827
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
828
        if (depth == 3
1!
829
# if defined(SIXEL_USE_SSE2)
830
            && simd_level >= SIXEL_SIMD_LEVEL_SSE2
×
831
# elif defined(SIXEL_USE_NEON)
832
            && simd_level >= SIXEL_SIMD_LEVEL_NEON
1!
833
# endif
834
            ) {
835
#if defined(SIXEL_USE_SSE2)
836
            acc128 = _mm_setzero_ps();
837
#elif defined(SIXEL_USE_NEON)
838
            acc_neon = vdupq_n_f32(0.0f);
839
#endif
840
            for (x = x_first; x <= x_last; x++) {
2!
841
                diff_x = (dstw >= srcw)
2✔
842
                             ? (x + 0.5) - center_x
843
                             : (x + 0.5) * dstw / srcw - center_x;
×
844
                weight = f_resample(fabs(diff_x));
845
                pos = (y * srcw + x) * depth;
846
                const unsigned char *psrc = src + pos;
847
#if defined(SIXEL_USE_SSE2)
848
                pixel128 = psrc[0] | (psrc[1] << 8) | (psrc[2] << 16);
849
                pixi128 = _mm_cvtsi32_si128((int)pixel128);
850
                pixi128 = _mm_unpacklo_epi8(pixi128, _mm_setzero_si128());
851
                pixi128 = _mm_unpacklo_epi16(pixi128, _mm_setzero_si128());
852
                pixf128 = _mm_cvtepi32_ps(pixi128);
853
                wv128 = _mm_set1_ps((float)weight);
854
                acc128 = _mm_add_ps(acc128, _mm_mul_ps(pixf128, wv128));
855
#else /* NEON */
856
                pix32_neon = (uint32x4_t){psrc[0], psrc[1], psrc[2], 0};
857
                pixf_neon = vcvtq_f32_u32(pix32_neon);
858
                wv_neon = vdupq_n_f32((float)weight);
859
                acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
860
#endif
861
                total += weight;
862
            }
863
            if (total > 0.0) {
1!
864
#if defined(SIXEL_USE_SSE2)
865
                scalev128 = _mm_set1_ps((float)(1.0 / total));
866
                acc128 = _mm_mul_ps(acc128, scalev128);
867
                minv128 = _mm_set1_ps(0.0f);
868
                maxv128 = _mm_set1_ps(255.0f);
869
                acc128 = _mm_max_ps(minv128, _mm_min_ps(acc128, maxv128));
870
                acci128 = _mm_cvtps_epi32(acc128);
871
                acc16_128 = _mm_packs_epi32(acci128, _mm_setzero_si128());
872
                acc16_128 = _mm_packus_epi16(acc16_128, _mm_setzero_si128());
873
                pos = (y * dstw + w) * depth;
874
                pixel128 = (unsigned int)_mm_cvtsi128_si32(acc16_128);
875
                tmp[pos + 0] = (unsigned char)pixel128;
876
                tmp[pos + 1] = (unsigned char)(pixel128 >> 8);
877
                tmp[pos + 2] = (unsigned char)(pixel128 >> 16);
878
#else /* NEON */
879
                scalev_neon = vdupq_n_f32((float)(1.0 / total));
880
                acc_neon = vmulq_f32(acc_neon, scalev_neon);
881
                minv_neon = vdupq_n_f32(0.0f);
882
                maxv_neon = vdupq_n_f32(255.0f);
883
                acc_neon = vmaxq_f32(minv_neon,
884
                                     vminq_f32(acc_neon, maxv_neon));
885
                acci_neon = vcvtq_u32_f32(acc_neon);
886
                acc16_neon = vmovn_u32(acci_neon);
887
                acc8_neon = vmovn_u16(vcombine_u16(acc16_neon, acc16_neon));
888

889
                vst1_u8(outb_neon, acc8_neon);
890
                pos = (y * dstw + w) * depth;
891
                tmp[pos + 0] = outb_neon[0];
892
                tmp[pos + 1] = outb_neon[1];
893
                tmp[pos + 2] = outb_neon[2];
894
#endif
895
            }
896
            continue;
897
        }
898
#endif /* SIMD paths */
899

900
        for (x = x_first; x <= x_last; x++) {
×
901
            diff_x = (dstw >= srcw)
×
902
                         ? (x + 0.5) - center_x
×
903
                         : (x + 0.5) * dstw / srcw - center_x;
×
904
            weight = f_resample(fabs(diff_x));
×
905
            for (i = 0; i < depth; i++) {
×
906
                pos = (y * srcw + x) * depth + i;
×
907
                offsets[i] += src[pos] * weight;
×
908
            }
909
            total += weight;
×
910
        }
911

912
        if (total > 0.0) {
×
913
            for (i = 0; i < depth; i++) {
×
914
                pos = (y * dstw + w) * depth + i;
×
915
                tmp[pos] = normalize(offsets[i], total);
×
916
            }
917
        }
918
    }
919
}
31,950✔
920

921
static SIXEL_ALIGN_STACK SIXEL_NO_INLINE void
922
scale_vertical_row(
11,967✔
923
    unsigned char *dst,
924
    unsigned char const *tmp,
925
    int const dstw,
926
    int const dsth,
927
    int const depth,
928
    int const srch,
929
    int const h,
930
    resample_fn_t const f_resample,
931
    double const n,
932
    int const simd_level)
933
{
934
    int w;
11,967✔
935
    int y;
11,967✔
936
    int i;
11,967✔
937
    int pos;
11,967✔
938
    int y_first;
11,967✔
939
    int y_last;
11,967✔
940
    double center_y;
11,967✔
941
    double diff_y;
11,967✔
942
    double weight;
11,967✔
943
    double total;
11,967✔
944
    double offsets[8];
11,967✔
945
#if defined(SIXEL_USE_AVX512)
946
    __m512 acc512;
11,967✔
947
#endif
948
#if defined(SIXEL_USE_AVX2) || defined(SIXEL_USE_AVX)
949
    __m256 acc256;
11,967✔
950
#endif
951
#if defined(SIXEL_USE_SSE2)
952
    __m128 acc128;
7,978✔
953
    __m128 minv128;
7,978✔
954
    __m128 maxv128;
7,978✔
955
    __m128 scalev128;
7,978✔
956
    __m128 wv128;
7,978✔
957
    __m128 pixf128;
7,978✔
958
    __m128i pixi128;
7,978✔
959
    __m128i acci128;
7,978✔
960
    __m128i acc16_128;
7,978✔
961
    unsigned int pixel128;
7,978✔
962
#endif
963
#if defined(SIXEL_USE_NEON)
964
    float32x4_t acc_neon;
965
    float32x4_t minv_neon;
966
    float32x4_t maxv_neon;
967
    float32x4_t scalev_neon;
968
    float32x4_t wv_neon;
969
    float32x4_t pixf_neon;
970
    uint32x4_t pix32_neon;
971
    uint32x4_t acci_neon;
972
    uint16x4_t acc16_neon;
973
    uint8x8_t acc8_neon;
974
    uint8_t outb_neon[8];
975
#endif
976

977
    for (w = 0; w < dstw; w++) {
3,205,335✔
978
        total = 0.0;
12,773,472✔
979
        for (i = 0; i < depth; i++) {
12,773,472✔
980
            offsets[i] = 0;
9,580,104✔
981
        }
982

983
        if (dsth >= srch) {
3,193,368✔
984
            center_y = (h + 0.5) * srch / dsth;
1,759,500✔
985
            y_first = MAX(center_y - n, 0);
1,759,500✔
986
            y_last = MIN(center_y + n, srch - 1);
1,759,500✔
987
        } else {
988
            center_y = h + 0.5;
1,433,868✔
989
            y_first = MAX(floor((center_y - n) * srch / dsth), 0);
1,433,868✔
990
            y_last = MIN(floor((center_y + n) * srch / dsth), srch - 1);
1,433,868✔
991
        }
992

993
#if defined(SIXEL_USE_AVX512)
994
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
3,193,368!
995
            acc512 = sixel_avx512_zero_ps();
×
996

997
            for (y = y_first; y <= y_last; y++) {
×
998
                diff_y = (dsth >= srch)
×
999
                             ? (y + 0.5) - center_y
×
1000
                             : (y + 0.5) * dsth / srch - center_y;
×
1001
                weight = f_resample(fabs(diff_y));
×
1002
                pos = (y * dstw + w) * depth;
×
1003
                acc512 = sixel_avx512_muladd_ps(
×
1004
                    acc512,
1005
                    sixel_avx512_load_rgb_ps(tmp + pos),
1006
                    (float)weight);
1007
                total += weight;
×
1008
            }
1009
            if (total > 0.0) {
×
1010
                pos = (h * dstw + w) * depth;
×
1011
                sixel_avx512_store_rgb_u8(acc512, total, dst + pos);
×
1012
            }
1013
            continue;
×
1014
        }
1015
#endif
1016
#if defined(SIXEL_USE_AVX2)
1017
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
3,193,368!
1018
            acc256 = sixel_avx2_zero_ps();
3,193,368✔
1019

1020
            for (y = y_first; y <= y_last; y++) {
22,642,452✔
1021
                diff_y = (dsth >= srch)
38,898,168✔
1022
                             ? (y + 0.5) - center_y
5,257,560✔
1023
                             : (y + 0.5) * dsth / srch - center_y;
19,449,084✔
1024
                weight = f_resample(fabs(diff_y));
19,449,084✔
1025
                pos = (y * dstw + w) * depth;
19,449,084✔
1026
                acc256 = sixel_avx2_muladd_ps(
19,449,084✔
1027
                    acc256,
1028
                    sixel_avx2_load_rgb_ps(tmp + pos),
1029
                    (float)weight);
1030
                total += weight;
19,449,084✔
1031
            }
1032
            if (total > 0.0) {
3,193,368!
1033
                pos = (h * dstw + w) * depth;
3,193,368✔
1034
                sixel_avx2_store_rgb_u8(acc256, total, dst + pos);
3,193,368✔
1035
            }
1036
            continue;
3,193,368✔
1037
        }
1038
#endif
1039
#if defined(SIXEL_USE_AVX)
1040
        if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
×
1041
            acc256 = sixel_avx_zero_ps();
×
1042

1043
            for (y = y_first; y <= y_last; y++) {
×
1044
                diff_y = (dsth >= srch)
×
1045
                             ? (y + 0.5) - center_y
×
1046
                             : (y + 0.5) * dsth / srch - center_y;
×
1047
                weight = f_resample(fabs(diff_y));
×
1048
                pos = (y * dstw + w) * depth;
×
1049
                acc256 = sixel_avx_muladd_ps(
×
1050
                    acc256,
1051
                    sixel_avx_load_rgb_ps(tmp + pos),
1052
                    (float)weight);
1053
                total += weight;
×
1054
            }
1055
            if (total > 0.0) {
×
1056
                pos = (h * dstw + w) * depth;
×
1057
                sixel_avx_store_rgb_u8(acc256, total, dst + pos);
×
1058
            }
1059
            continue;
×
1060
        }
1061
#endif
1062
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
1063
        if (depth == 3
1!
1064
# if defined(SIXEL_USE_SSE2)
1065
            && simd_level >= SIXEL_SIMD_LEVEL_SSE2
×
1066
# elif defined(SIXEL_USE_NEON)
1067
            && simd_level >= SIXEL_SIMD_LEVEL_NEON
1!
1068
# endif
1069
            ) {
1070
#if defined(SIXEL_USE_SSE2)
1071
            acc128 = _mm_setzero_ps();
1072
#elif defined(SIXEL_USE_NEON)
1073
            acc_neon = vdupq_n_f32(0.0f);
1074
#endif
1075
            for (y = y_first; y <= y_last; y++) {
2!
1076
                diff_y = (dsth >= srch)
2✔
1077
                             ? (y + 0.5) - center_y
1078
                             : (y + 0.5) * dsth / srch - center_y;
×
1079
                weight = f_resample(fabs(diff_y));
1080
                pos = (y * dstw + w) * depth;
1081
                const unsigned char *psrc = tmp + pos;
1082
#if defined(SIXEL_USE_SSE2)
1083
                pixel128 = psrc[0] | (psrc[1] << 8) | (psrc[2] << 16);
1084
                pixi128 = _mm_cvtsi32_si128((int)pixel128);
1085
                pixi128 = _mm_unpacklo_epi8(pixi128, _mm_setzero_si128());
1086
                pixi128 = _mm_unpacklo_epi16(pixi128, _mm_setzero_si128());
1087
                pixf128 = _mm_cvtepi32_ps(pixi128);
1088
                wv128 = _mm_set1_ps((float)weight);
1089
                acc128 = _mm_add_ps(acc128, _mm_mul_ps(pixf128, wv128));
1090
#else /* NEON */
1091
                pix32_neon = (uint32x4_t){psrc[0], psrc[1], psrc[2], 0};
1092
                pixf_neon = vcvtq_f32_u32(pix32_neon);
1093
                wv_neon = vdupq_n_f32((float)weight);
1094
                acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
1095
#endif
1096
                total += weight;
1097
            }
1098
            if (total > 0.0) {
1!
1099
#if defined(SIXEL_USE_SSE2)
1100
                scalev128 = _mm_set1_ps((float)(1.0 / total));
1101
                acc128 = _mm_mul_ps(acc128, scalev128);
1102
                minv128 = _mm_set1_ps(0.0f);
1103
                maxv128 = _mm_set1_ps(255.0f);
1104
                acc128 = _mm_max_ps(minv128, _mm_min_ps(acc128, maxv128));
1105
                acci128 = _mm_cvtps_epi32(acc128);
1106
                acc16_128 = _mm_packs_epi32(acci128, _mm_setzero_si128());
1107
                acc16_128 = _mm_packus_epi16(acc16_128, _mm_setzero_si128());
1108
                pos = (h * dstw + w) * depth;
1109
                pixel128 = (unsigned int)_mm_cvtsi128_si32(acc16_128);
1110
                dst[pos + 0] = (unsigned char)pixel128;
1111
                dst[pos + 1] = (unsigned char)(pixel128 >> 8);
1112
                dst[pos + 2] = (unsigned char)(pixel128 >> 16);
1113
#else /* NEON */
1114
                scalev_neon = vdupq_n_f32((float)(1.0 / total));
1115
                acc_neon = vmulq_f32(acc_neon, scalev_neon);
1116
                minv_neon = vdupq_n_f32(0.0f);
1117
                maxv_neon = vdupq_n_f32(255.0f);
1118
                acc_neon = vmaxq_f32(minv_neon,
1119
                                     vminq_f32(acc_neon, maxv_neon));
1120
                acci_neon = vcvtq_u32_f32(acc_neon);
1121
                acc16_neon = vmovn_u32(acci_neon);
1122
                acc8_neon = vmovn_u16(vcombine_u16(acc16_neon, acc16_neon));
1123

1124
                vst1_u8(outb_neon, acc8_neon);
1125
                pos = (h * dstw + w) * depth;
1126
                dst[pos + 0] = outb_neon[0];
1127
                dst[pos + 1] = outb_neon[1];
1128
                dst[pos + 2] = outb_neon[2];
1129
#endif
1130
            }
1131
            continue;
1132
        }
1133
#endif /* SIMD paths */
1134
        for (y = y_first; y <= y_last; y++) {
×
1135
            diff_y = (dsth >= srch)
×
1136
                         ? (y + 0.5) - center_y
×
1137
                         : (y + 0.5) * dsth / srch - center_y;
×
1138
            weight = f_resample(fabs(diff_y));
×
1139
            for (i = 0; i < depth; i++) {
×
1140
                pos = (y * dstw + w) * depth + i;
×
1141
                offsets[i] += tmp[pos] * weight;
×
1142
            }
1143
            total += weight;
×
1144
        }
1145

1146
        if (total > 0.0) {
×
1147
            for (i = 0; i < depth; i++) {
×
1148
                pos = (h * dstw + w) * depth + i;
×
1149
                dst[pos] = normalize(offsets[i], total);
×
1150
            }
1151
        }
1152
    }
1153
}
11,967✔
1154

1155
static void
1156
scale_with_resampling_serial(
75✔
1157
    unsigned char *dst,
1158
    unsigned char const *src,
1159
    int const srcw,
1160
    int const srch,
1161
    int const dstw,
1162
    int const dsth,
1163
    int const depth,
1164
    resample_fn_t const f_resample,
1165
    double const n,
1166
    unsigned char *tmp)
1167
{
1168
    int y;
75✔
1169
    int h;
75✔
1170
    int simd_level;
75✔
1171

1172
    simd_level = sixel_scale_simd_level();
75✔
1173

1174
    for (y = 0; y < srch; y++) {
32,100✔
1175
        scale_horizontal_row(tmp,
31,950✔
1176
                             src,
1177
                             srcw,
1178
                             dstw,
1179
                             depth,
1180
                             y,
1181
                             f_resample,
1182
                             n,
1183
                             simd_level);
1184
    }
1185

1186
    for (h = 0; h < dsth; h++) {
12,042✔
1187
        scale_vertical_row(dst,
11,967✔
1188
                           tmp,
1189
                           dstw,
1190
                           dsth,
1191
                           depth,
1192
                           srch,
1193
                           h,
1194
                           f_resample,
1195
                           n,
1196
                           simd_level);
1197
    }
1198
}
75✔
1199

1200
#if SIXEL_ENABLE_THREADS
1201
typedef enum scale_parallel_pass {
1202
    SCALE_PASS_HORIZONTAL = 0,
1203
    SCALE_PASS_VERTICAL = 1
1204
} scale_parallel_pass_t;
1205

1206
typedef struct scale_parallel_context {
1207
    unsigned char *dst;
1208
    unsigned char const *src;
1209
    unsigned char *tmp;
1210
    int srcw;
1211
    int srch;
1212
    int dstw;
1213
    int dsth;
1214
    int depth;
1215
    resample_fn_t f_resample;
1216
    double n;
1217
    scale_parallel_pass_t pass;
1218
    int simd_level;
1219
    int band_span;
1220
    sixel_logger_t *logger;
1221
} scale_parallel_context_t;
1222

1223
/*
1224
 * Emit timeline entries for every band so downstream aggregation can compute
1225
 * first/last activity windows per thread without losing information.
1226
 */
1227
static int
1228
scale_parallel_should_log(scale_parallel_context_t const *ctx, int index)
1229
{
1230
    int span;
1231

1232
    if (ctx == NULL || ctx->logger == NULL || !ctx->logger->active) {
×
1233
        return 0;
1234
    }
1235

1236
    if (index < 0) {
×
1237
        return 0;
1238
    }
1239

1240
    if (ctx->pass == SCALE_PASS_HORIZONTAL) {
×
1241
        span = ctx->srch;
1242
    } else {
1243
        span = ctx->dsth;
1244
    }
1245

1246
    if (span <= 0 || index >= span) {
×
1247
        return 0;
1248
    }
1249

1250
    return 1;
1251
}
1252

1253
/*
1254
 * Allow callers to raise the floor for parallel execution using
1255
 * SIXEL_SCALE_PARALLEL_MIN_BYTES. The default of zero preserves the previous
1256
 * eager behavior while permitting deployments to defer threading on tiny
1257
 * inputs.
1258
 */
1259
static size_t
1260
scale_parallel_min_bytes(void)
50✔
1261
{
1262
    static int initialized = 0;
50✔
1263
    static size_t threshold = 0;
50✔
1264
    char const *text;
50✔
1265
    char *endptr;
50✔
1266
    unsigned long long parsed;
50✔
1267

1268
    if (initialized) {
50!
1269
        return threshold;
1270
    }
1271

1272
    initialized = 1;
50✔
1273
    text = sixel_compat_getenv("SIXEL_SCALE_PARALLEL_MIN_BYTES");
50✔
1274
    if (text == NULL || text[0] == '\0') {
50!
1275
        return threshold;
50✔
1276
    }
1277

1278
    errno = 0;
1279
    parsed = strtoull(text, &endptr, 10);
1280
    if (endptr == text || *endptr != '\0' || errno == ERANGE) {
×
1281
        return threshold;
1282
    }
1283

1284
    if (parsed > (unsigned long long)SIZE_MAX) {
×
1285
        threshold = SIZE_MAX;
1286
    } else {
1287
        threshold = (size_t)parsed;
1288
    }
1289

1290
    return threshold;
1291
}
1292

1293
/*
1294
 * Choose the number of rows handled per threadpool job. We prefer an
1295
 * environment override via SIXEL_PARALLEL_FACTOR so deployments can tune
1296
 * queueing overhead. Otherwise derive a span from rows/threads and clamp to
1297
 * [1, rows]. The value is cached after the first lookup.
1298
 */
1299
static int
1300
scale_parallel_band_span(int rows, int threads)
1301
{
1302
    static int initialized = 0;
1303
    static int env_span = 0;
1304
    char const *text;
1305
    char *endptr;
1306
    long parsed;
1307
    int span;
1308

1309
    if (rows <= 0) {
×
1310
        return 1;
1311
    }
1312

1313
    if (!initialized) {
×
1314
        initialized = 1;
1315
        text = sixel_compat_getenv("SIXEL_PARALLEL_FACTOR");
1316
        if (text != NULL && text[0] != '\0') {
×
1317
            errno = 0;
1318
            parsed = strtol(text, &endptr, 10);
1319
            if (endptr != text && *endptr == '\0' && errno != ERANGE &&
×
1320
                parsed > 0 && parsed <= INT_MAX) {
×
1321
                env_span = (int)parsed;
1322
            }
1323
        }
1324
    }
1325

1326
    if (env_span > 0) {
×
1327
        span = env_span;
1328
    } else {
1329
        span = rows / threads;
1330
    }
1331

1332
    if (span < 1) {
×
1333
        span = 1;
1334
    }
1335
    if (span > rows) {
×
1336
        span = rows;
1337
    }
1338

1339
    return span;
1340
}
1341

1342
static int
1343
scale_parallel_worker(tp_job_t job, void *userdata, void *workspace)
1344
{
1345
    scale_parallel_context_t *ctx;
1346
    int index;
1347
    char const *role;
1348
    int y0;
1349
    int y1;
1350
    int in0;
1351
    int in1;
1352
    int limit;
1353
    int y;
1354

1355
    (void)workspace;
1356
    ctx = (scale_parallel_context_t *)userdata;
1357
    if (ctx == NULL) {
×
1358
        return SIXEL_BAD_ARGUMENT;
1359
    }
1360

1361
    role = "horizontal";
1362
    y0 = 0;
1363
    y1 = 0;
1364
    in0 = 0;
1365
    in1 = 0;
1366
    index = job.band_index;
1367
    limit = ctx->srch;
1368
    if (ctx->pass == SCALE_PASS_HORIZONTAL) {
×
1369
        limit = ctx->srch;
1370
    } else {
1371
        limit = ctx->dsth;
1372
    }
1373

1374
    if (index < 0 || index >= limit) {
×
1375
        return SIXEL_BAD_ARGUMENT;
1376
    }
1377

1378
    y0 = index;
1379
    y1 = index + ctx->band_span;
1380
    if (y1 > limit) {
×
1381
        y1 = limit;
1382
    }
1383

1384
    if (ctx->pass == SCALE_PASS_HORIZONTAL) {
×
1385
        in1 = ctx->dstw;
1386
        if (scale_parallel_should_log(ctx, index)) {
×
1387
            sixel_logger_logf(ctx->logger,
1388
                              role,
1389
                              "scale",
1390
                              "start",
1391
                              index,
1392
                              y1 - 1,
1393
                              y0,
1394
                              y1,
1395
                              in0,
1396
                              in1,
1397
                              "horizontal pass");
1398
        }
1399
        for (y = y0; y < y1; y++) {
×
1400
            scale_horizontal_row(ctx->tmp,
1401
                                 ctx->src,
1402
                                 ctx->srcw,
1403
                                 ctx->dstw,
1404
                                 ctx->depth,
1405
                                 y,
1406
                                 ctx->f_resample,
1407
                                 ctx->n,
1408
                                 ctx->simd_level);
1409
        }
1410
    } else {
1411
        role = "vertical";
1412
        in1 = ctx->srch;
1413
        if (scale_parallel_should_log(ctx, index)) {
×
1414
            sixel_logger_logf(ctx->logger,
1415
                              role,
1416
                              "scale",
1417
                              "start",
1418
                              index,
1419
                              y1 - 1,
1420
                              y0,
1421
                              y1,
1422
                              in0,
1423
                              in1,
1424
                              "vertical pass");
1425
        }
1426
        for (y = y0; y < y1; y++) {
×
1427
            scale_vertical_row(ctx->dst,
1428
                               ctx->tmp,
1429
                               ctx->dstw,
1430
                               ctx->dsth,
1431
                               ctx->depth,
1432
                               ctx->srch,
1433
                               y,
1434
                               ctx->f_resample,
1435
                               ctx->n,
1436
                               ctx->simd_level);
1437
        }
1438
    }
1439

1440
    if (scale_parallel_should_log(ctx, index)) {
×
1441
        sixel_logger_logf(ctx->logger,
1442
                          role,
1443
                          "scale",
1444
                          "finish",
1445
                          index,
1446
                          y1 - 1,
1447
                          y0,
1448
                          y1,
1449
                          in0,
1450
                          in1,
1451
                          "pass complete");
1452
    }
1453

1454
    return SIXEL_OK;
1455
}
1456

1457
/*
1458
 * Parallel path mirrors the encoder and dither thread selection through
1459
 * sixel_threads_resolve(). Rows are batched into jobs for both passes so the
1460
 * caller can saturate the threadpool without altering the filtering math while
1461
 * reducing queue overhead.
1462
 */
1463
static int
1464
scale_with_resampling_parallel(
50✔
1465
    unsigned char *dst,
1466
    unsigned char const *src,
1467
    int const srcw,
1468
    int const srch,
1469
    int const dstw,
1470
    int const dsth,
1471
    int const depth,
1472
    resample_fn_t const f_resample,
1473
    double const n,
1474
    unsigned char *tmp,
1475
    sixel_logger_t *logger)
1476
{
1477
    scale_parallel_context_t ctx;
50✔
1478
    threadpool_t *pool;
50✔
1479
    tp_job_t job;
50✔
1480
    size_t image_bytes;
50✔
1481
    int threads;
50✔
1482
    int queue_depth;
50✔
1483
    int y;
50✔
1484
    int rc;
50✔
1485
    int logger_ready;
50✔
1486
    int horizontal_span;
50✔
1487
    int vertical_span;
50✔
1488

1489
    image_bytes = (size_t)srcw * (size_t)srch * (size_t)depth;
50✔
1490
    if (image_bytes < scale_parallel_min_bytes()) {
50!
1491
        if (logger != NULL) {
×
1492
            sixel_logger_logf(logger,
1493
                              "controller",
1494
                              "scale",
1495
                              "skip",
1496
                              -1,
1497
                              -1,
1498
                              0,
1499
                              0,
1500
                              0,
1501
                              0,
1502
                              "below threshold bytes=%zu",
1503
                              image_bytes);
1504
        }
1505
        return SIXEL_BAD_ARGUMENT;
1506
    }
1507

1508
    threads = sixel_threads_resolve();
50✔
1509
    if (threads < 2) {
50!
1510
        if (logger != NULL) {
50!
1511
            sixel_logger_logf(logger,
1512
                              "controller",
1513
                              "scale",
1514
                              "skip",
1515
                              -1,
1516
                              -1,
1517
                              0,
1518
                              0,
1519
                              0,
1520
                              0,
1521
                              "threads=%d",
1522
                              threads);
1523
        }
1524
        return SIXEL_BAD_ARGUMENT;
50✔
1525
    }
1526

1527
    logger_ready = logger != NULL && logger->active;
×
1528
    if (logger_ready) {
×
1529
        sixel_logger_logf(logger,
1530
                          "controller",
1531
                          "scale",
1532
                          "start",
1533
                          -1,
1534
                          -1,
1535
                          0,
1536
                          srch,
1537
                          0,
1538
                          dsth,
1539
                          "parallel scale src=%dx%d dst=%dx%d",
1540
                          srcw,
1541
                          srch,
1542
                          dstw,
1543
                          dsth);
1544
    }
1545

1546
    ctx.dst = dst;
1547
    ctx.src = src;
1548
    ctx.tmp = tmp;
1549
    ctx.srcw = srcw;
1550
    ctx.srch = srch;
1551
    ctx.dstw = dstw;
1552
    ctx.dsth = dsth;
1553
    ctx.depth = depth;
1554
    ctx.f_resample = f_resample;
1555
    ctx.n = n;
1556
    ctx.simd_level = sixel_scale_simd_level();
1557
    ctx.logger = logger_ready ? logger : NULL;
×
1558

1559
    /*
1560
     * Batch rows to reduce queue churn. Prefer the environment override so
1561
     * deployments can pin a consistent span; otherwise derive a default from
1562
     * rows per thread.
1563
     */
1564
    horizontal_span = scale_parallel_band_span(srch, threads);
1565
    vertical_span = scale_parallel_band_span(dsth, threads);
1566

1567
    queue_depth = threads * 3;
1568
    if (queue_depth > srch) {
×
1569
        queue_depth = srch;
1570
    }
1571
    if (queue_depth < 1) {
×
1572
        queue_depth = 1;
1573
    }
1574

1575
    ctx.pass = SCALE_PASS_HORIZONTAL;
1576
    ctx.band_span = horizontal_span;
1577
    if (logger_ready) {
×
1578
        sixel_logger_logf(logger,
1579
                          "controller",
1580
                          "scale",
1581
                          "pass_start",
1582
                          -1,
1583
                          0,
1584
                          0,
1585
                          srch,
1586
                          0,
1587
                          ctx.dstw,
1588
                          "horizontal queue=%d threads=%d",
1589
                          queue_depth,
1590
                          threads);
1591
    }
1592
    pool = threadpool_create(threads,
1593
                             queue_depth,
1594
                             0,
1595
                             scale_parallel_worker,
1596
                             &ctx);
1597
    if (pool == NULL) {
×
1598
        return SIXEL_BAD_ALLOCATION;
1599
    }
1600

1601
    for (y = 0; y < srch; y += horizontal_span) {
×
1602
        job.band_index = y;
1603
        threadpool_push(pool, job);
1604
    }
1605
    threadpool_finish(pool);
1606
    rc = threadpool_get_error(pool);
1607
    threadpool_destroy(pool);
1608
    if (rc != SIXEL_OK) {
×
1609
        return rc;
1610
    }
1611

1612
    if (logger_ready) {
×
1613
        sixel_logger_logf(logger,
1614
                          "controller",
1615
                          "scale",
1616
                          "pass_finish",
1617
                          -1,
1618
                          srch - 1,
1619
                          0,
1620
                          srch,
1621
                          0,
1622
                          ctx.dstw,
1623
                          "horizontal complete");
1624
    }
1625

1626
    queue_depth = threads * 3;
1627
    if (queue_depth > dsth) {
×
1628
        queue_depth = dsth;
1629
    }
1630
    if (queue_depth < 1) {
×
1631
        queue_depth = 1;
1632
    }
1633

1634
    ctx.pass = SCALE_PASS_VERTICAL;
1635
    ctx.band_span = vertical_span;
1636
    if (logger_ready) {
×
1637
        sixel_logger_logf(logger,
1638
                          "controller",
1639
                          "scale",
1640
                          "pass_start",
1641
                          -1,
1642
                          0,
1643
                          0,
1644
                          dsth,
1645
                          0,
1646
                          ctx.srch,
1647
                          "vertical queue=%d threads=%d",
1648
                          queue_depth,
1649
                          threads);
1650
    }
1651
    pool = threadpool_create(threads,
1652
                             queue_depth,
1653
                             0,
1654
                             scale_parallel_worker,
1655
                             &ctx);
1656
    if (pool == NULL) {
×
1657
        return SIXEL_BAD_ALLOCATION;
1658
    }
1659

1660
    for (y = 0; y < dsth; y += vertical_span) {
×
1661
        job.band_index = y;
1662
        threadpool_push(pool, job);
1663
    }
1664
    threadpool_finish(pool);
1665
    rc = threadpool_get_error(pool);
1666
    threadpool_destroy(pool);
1667

1668
    if (logger_ready) {
×
1669
        sixel_logger_logf(logger,
1670
                          "controller",
1671
                          "scale",
1672
                          "pass_finish",
1673
                          -1,
1674
                          dsth - 1,
1675
                          0,
1676
                          dsth,
1677
                          0,
1678
                          ctx.srch,
1679
                          "vertical complete rc=%d",
1680
                          rc);
1681
        sixel_logger_logf(logger,
1682
                          "controller",
1683
                          "scale",
1684
                          "finish",
1685
                          -1,
1686
                          dsth - 1,
1687
                          0,
1688
                          dsth,
1689
                          0,
1690
                          ctx.srch,
1691
                          "parallel scale status=%d",
1692
                          rc);
1693
    }
1694

1695
    return rc;
1696
}
1697
#endif /* SIXEL_ENABLE_THREADS */
1698

1699
/*
1700
 * Allocate shared scratch storage and attempt the parallel pipeline first so
1701
 * larger inputs benefit from threading while smaller ones retain the serial
1702
 * behavior.
1703
 */
1704
static void
1705
scale_with_resampling(
75✔
1706
    unsigned char *dst,
1707
    unsigned char const *src,
1708
    int const srcw,
1709
    int const srch,
1710
    int const dstw,
1711
    int const dsth,
1712
    int const depth,
1713
    resample_fn_t const f_resample,
1714
    double n,
1715
    sixel_allocator_t *allocator)
1716
{
1717
    unsigned char *tmp;
75✔
1718
    size_t tmp_size;
75✔
1719
#if SIXEL_ENABLE_THREADS
1720
    int rc;
50✔
1721
    sixel_logger_t logger;
50✔
1722
    int logger_prepared;
50✔
1723
#endif
1724

1725
#if SIXEL_ENABLE_THREADS
1726
    sixel_logger_init(&logger);
50✔
1727
    logger_prepared = 0;
50✔
1728
    (void)sixel_logger_prepare_env(&logger);
50✔
1729
    logger_prepared = logger.active;
50✔
1730
#endif
1731

1732
    tmp_size = (size_t)dstw * (size_t)srch * (size_t)depth;
75✔
1733
    tmp = (unsigned char *)sixel_allocator_malloc(allocator, tmp_size);
75✔
1734
    if (tmp == NULL) {
75!
1735
#if SIXEL_ENABLE_THREADS
1736
        if (logger_prepared) {
×
1737
            sixel_logger_close(&logger);
1738
        }
1739
#endif
1740
        return;
1741
    }
1742

1743
#if SIXEL_ENABLE_THREADS
1744
    rc = scale_with_resampling_parallel(dst,
100!
1745
                                        src,
1746
                                        srcw,
1747
                                        srch,
1748
                                        dstw,
1749
                                        dsth,
1750
                                        depth,
1751
                                        f_resample,
1752
                                        n,
1753
                                        tmp,
1754
                                        logger_prepared
1!
1755
                                            ? &logger
1756
                                            : NULL);
1757
    if (rc == SIXEL_OK) {
50!
1758
        sixel_allocator_free(allocator, tmp);
1759
        if (logger_prepared) {
×
1760
            sixel_logger_close(&logger);
1761
        }
1762
        return;
1763
    }
1764

1765
    if (logger_prepared) {
50!
1766
        sixel_logger_logf(&logger,
1767
                          "controller",
1768
                          "scale",
1769
                          "fallback",
1770
                          -1,
1771
                          -1,
1772
                          0,
1773
                          dsth,
1774
                          0,
1775
                          srch,
1776
                          "parallel rc=%d",
1777
                          rc);
1778
    }
1779
#endif
1780

1781
    scale_with_resampling_serial(dst,
75✔
1782
                                 src,
1783
                                 srcw,
1784
                                 srch,
1785
                                 dstw,
1786
                                 dsth,
1787
                                 depth,
1788
                                 f_resample,
1789
                                 n,
1790
                                 tmp);
1791

1792
    sixel_allocator_free(allocator, tmp);
75✔
1793
#if SIXEL_ENABLE_THREADS
1794
    if (logger_prepared) {
50!
1795
        sixel_logger_close(&logger);
1796
    }
1797
#endif
1798
}
1!
1799

1800
/*
1801
 * Floating-point scaler mirrors the byte-path SSE2 usage. Keep it noinline
1802
 * on i386 so the SIXEL_ALIGN_STACK prologue stays in place when SSE2 locals
1803
 * need to spill to the stack.
1804
 */
1805
static SIXEL_ALIGN_STACK SIXEL_NO_INLINE void
1806
scale_with_resampling_float32(
×
1807
    float *dst,
1808
    float const *src,
1809
    int const srcw,
1810
    int const srch,
1811
    int const dstw,
1812
    int const dsth,
1813
    int const depth,
1814
    resample_fn_t const f_resample,
1815
    double n,
1816
    sixel_allocator_t *allocator)
1817
{
1818
    int w;
×
1819
    int h;
×
1820
    int x;
×
1821
    int y;
×
1822
    int i;
×
1823
    int pos;
×
1824
    int x_first;
×
1825
    int x_last;
×
1826
    int y_first;
×
1827
    int y_last;
×
1828
    double center_x;
×
1829
    double center_y;
×
1830
    double diff_x;
×
1831
    double diff_y;
×
1832
    double weight;
×
1833
    double total;
×
1834
    double offsets[8];
×
1835
    float *tmp;
×
1836
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
1837
    float vecbuf[4];
1838
#endif
1839
    int simd_level;
×
1840
#if defined(SIXEL_USE_AVX512)
1841
    __m512 acc512;
×
1842
#endif
1843
#if defined(SIXEL_USE_AVX2) || defined(SIXEL_USE_AVX)
1844
    __m256 acc256;
×
1845
#endif
1846
#if defined(SIXEL_USE_SSE2)
1847
    __m128 acc128;
1848
    __m128 pixf128;
1849
    __m128 wv128;
1850
    __m128 scalev128;
1851
    __m128 minv128;
1852
    __m128 maxv128;
1853
#elif defined(SIXEL_USE_NEON)
1854
    float32x4_t acc_neon;
1855
    float32x4_t pixf_neon;
1856
    float32x4_t wv_neon;
1857
    float32x4_t scalev_neon;
1858
    float32x4_t minv_neon;
1859
    float32x4_t maxv_neon;
1860
#endif
1861

1862
    tmp = (float *)sixel_allocator_malloc(
×
1863
        allocator,
1864
        (size_t)(dstw * srch * depth * (int)sizeof(float)));
×
1865
    if (tmp == NULL) {
×
1866
        return;
×
1867
    }
1868

1869
    simd_level = sixel_scale_simd_level();
×
1870

1871
    for (y = 0; y < srch; y++) {
×
1872
        for (w = 0; w < dstw; w++) {
×
1873
            total = 0.0;
×
1874
            for (i = 0; i < depth; i++) {
×
1875
                offsets[i] = 0.0;
×
1876
            }
1877

1878
            if (dstw >= srcw) {
×
1879
                center_x = (w + 0.5) * srcw / dstw;
×
1880
                x_first = MAX(center_x - n, 0);
×
1881
                x_last = MIN(center_x + n, srcw - 1);
×
1882
            } else {
1883
                center_x = w + 0.5;
×
1884
                x_first = MAX(floor((center_x - n) * srcw / dstw), 0);
×
1885
                x_last = MIN(floor((center_x + n) * srcw / dstw),
×
1886
                             srcw - 1);
1887
            }
1888

1889
#if defined(SIXEL_USE_AVX512)
1890
            if (depth == 3 &&
×
1891
                simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
×
1892
                acc512 = sixel_avx512_zero_ps();
×
1893

1894
                for (x = x_first; x <= x_last; x++) {
×
1895
                    diff_x = (dstw >= srcw)
×
1896
                                 ? (x + 0.5) - center_x
×
1897
                                 : (x + 0.5) * srcw / dstw - center_x;
×
1898
                    weight = f_resample(fabs(diff_x));
×
1899
                    pos = (y * srcw + x) * depth;
×
1900
                    acc512 = sixel_avx512_muladd_ps(
×
1901
                        acc512,
1902
                        sixel_avx512_load_rgb_f32(src + pos),
×
1903
                        (float)weight);
1904
                    total += weight;
×
1905
                }
1906
                if (total > 0.0) {
×
1907
                    pos = (y * dstw + w) * depth;
×
1908
                    sixel_avx512_store_rgb_f32(acc512, total, tmp + pos);
×
1909
                }
1910
            } else
1911
#endif
1912
#if defined(SIXEL_USE_AVX2)
1913
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
×
1914
                acc256 = sixel_avx2_zero_ps();
×
1915

1916
                for (x = x_first; x <= x_last; x++) {
×
1917
                    diff_x = (dstw >= srcw)
×
1918
                                 ? (x + 0.5) - center_x
×
1919
                                 : (x + 0.5) * srcw / dstw - center_x;
×
1920
                    weight = f_resample(fabs(diff_x));
×
1921
                    pos = (y * srcw + x) * depth;
×
1922
                    acc256 = sixel_avx2_muladd_ps(
×
1923
                        acc256,
1924
                        sixel_avx2_load_rgb_f32(src + pos),
×
1925
                        (float)weight);
1926
                    total += weight;
×
1927
                }
1928
                if (total > 0.0) {
×
1929
                    pos = (y * dstw + w) * depth;
×
1930
                    sixel_avx2_store_rgb_f32(acc256, total, tmp + pos);
×
1931
                }
1932
            } else
1933
#endif
1934
#if defined(SIXEL_USE_AVX)
1935
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
×
1936
                acc256 = sixel_avx_zero_ps();
×
1937

1938
                for (x = x_first; x <= x_last; x++) {
×
1939
                    diff_x = (dstw >= srcw)
×
1940
                                 ? (x + 0.5) - center_x
×
1941
                                 : (x + 0.5) * srcw / dstw - center_x;
×
1942
                    weight = f_resample(fabs(diff_x));
×
1943
                    pos = (y * srcw + x) * depth;
×
1944
                    acc256 = sixel_avx_muladd_ps(
×
1945
                        acc256,
1946
                        sixel_avx_load_rgb_f32(src + pos),
×
1947
                        (float)weight);
1948
                    total += weight;
×
1949
                }
1950
                if (total > 0.0) {
×
1951
                    pos = (y * dstw + w) * depth;
×
1952
                    sixel_avx_store_rgb_f32(acc256, total, tmp + pos);
×
1953
                }
1954
            } else
1955
#endif
1956
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
1957
            if (depth == 3
×
1958
# if defined(SIXEL_USE_SSE2)
1959
                && simd_level >= SIXEL_SIMD_LEVEL_SSE2
×
1960
# elif defined(SIXEL_USE_NEON)
1961
                && simd_level >= SIXEL_SIMD_LEVEL_NEON
×
1962
# endif
1963
                ) {
1964
#if defined(SIXEL_USE_SSE2)
1965
                acc128 = _mm_setzero_ps();
1966
                minv128 = _mm_set1_ps(0.0f);
1967
                maxv128 = _mm_set1_ps(1.0f);
1968
#elif defined(SIXEL_USE_NEON)
1969
                acc_neon = vdupq_n_f32(0.0f);
1970
                minv_neon = vdupq_n_f32(0.0f);
1971
                maxv_neon = vdupq_n_f32(1.0f);
1972
#endif
1973
                for (x = x_first; x <= x_last; x++) {
×
1974
                    diff_x = (dstw >= srcw)
×
1975
                                 ? (x + 0.5) - center_x
1976
                                 : (x + 0.5) * srcw / dstw - center_x;
×
1977
                    weight = f_resample(fabs(diff_x));
1978
                    pos = (y * srcw + x) * depth;
1979
                    const float *psrc = src + pos;
1980
#if defined(SIXEL_USE_SSE2)
1981
                    pixf128 = _mm_set_ps(
1982
                        0.0f, psrc[2], psrc[1], psrc[0]);
1983
                    wv128 = _mm_set1_ps((float)weight);
1984
                    acc128 = _mm_add_ps(acc128,
1985
                                        _mm_mul_ps(pixf128, wv128));
1986
#else /* NEON */
1987
                    /*
1988
                     * Expand the RGB triple into a NEON vector without
1989
                     * brace initialization to keep older toolchains
1990
                     * happy.
1991
                     */
1992
                    pixf_neon = vdupq_n_f32(0.0f);
1993
                    pixf_neon = vsetq_lane_f32(psrc[0], pixf_neon, 0);
1994
                    pixf_neon = vsetq_lane_f32(psrc[1], pixf_neon, 1);
1995
                    pixf_neon = vsetq_lane_f32(psrc[2], pixf_neon, 2);
1996
                    wv_neon = vdupq_n_f32((float)weight);
1997
                    acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
1998
#endif
1999
                    total += weight;
2000
                }
2001
                if (total > 0.0) {
×
2002
#if defined(SIXEL_USE_SSE2)
2003
                    scalev128 = _mm_set1_ps((float)(1.0 / total));
2004
                    acc128 = _mm_mul_ps(acc128, scalev128);
2005
                    acc128 = _mm_max_ps(minv128,
2006
                                        _mm_min_ps(acc128, maxv128));
2007
                    _mm_storeu_ps(vecbuf, acc128);
2008
#else /* NEON */
2009
                    scalev_neon = vdupq_n_f32(
2010
                        (float)(1.0 / total));
2011
                    acc_neon = vmulq_f32(acc_neon, scalev_neon);
2012
                    acc_neon = vmaxq_f32(minv_neon,
2013
                                         vminq_f32(acc_neon, maxv_neon));
2014
                    vst1q_f32(vecbuf, acc_neon);
2015
#endif
2016
                    pos = (y * dstw + w) * depth;
2017
                    tmp[pos + 0] = vecbuf[0];
2018
                    tmp[pos + 1] = vecbuf[1];
2019
                    tmp[pos + 2] = vecbuf[2];
2020
                }
2021
            } else
2022
#endif
2023
            {
2024
                for (x = x_first; x <= x_last; x++) {
×
2025
                    diff_x = (dstw >= srcw)
×
2026
                                 ? (x + 0.5) - center_x
×
2027
                                 : (x + 0.5) * srcw / dstw - center_x;
×
2028
                    weight = f_resample(fabs(diff_x));
×
2029
                    for (i = 0; i < depth; i++) {
×
2030
                        pos = (y * srcw + x) * depth + i;
×
2031
                        offsets[i] += src[pos] * weight;
×
2032
                    }
2033
                    total += weight;
×
2034
                }
2035

2036
                if (total > 0.0) {
×
2037
                    for (i = 0; i < depth; i++) {
×
2038
                        pos = (y * dstw + w) * depth + i;
×
2039
                        tmp[pos] = sixel_clamp_unit_f32(
×
2040
                            (float)(offsets[i] / total));
×
2041
                    }
2042
                }
2043
            }
2044
        }
2045
    }
2046

2047
    for (h = 0; h < dsth; h++) {
×
2048
        for (w = 0; w < dstw; w++) {
×
2049
            total = 0.0;
×
2050
            for (i = 0; i < depth; i++) {
×
2051
                offsets[i] = 0.0;
×
2052
            }
2053

2054
            if (dsth >= srch) {
×
2055
                center_y = (h + 0.5) * srch / dsth;
×
2056
                y_first = MAX(center_y - n, 0);
×
2057
                y_last = MIN(center_y + n, srch - 1);
×
2058
            } else {
2059
                center_y = h + 0.5;
×
2060
                y_first = MAX(floor((center_y - n) * srch / dsth), 0);
×
2061
                y_last = MIN(floor((center_y + n) * srch / dsth),
×
2062
                             srch - 1);
2063
            }
2064

2065
#if defined(SIXEL_USE_AVX512)
2066
            if (depth == 3 &&
×
2067
                simd_level >= SIXEL_SIMD_LEVEL_AVX512) {
×
2068
                acc512 = sixel_avx512_zero_ps();
×
2069

2070
                for (y = y_first; y <= y_last; y++) {
×
2071
                    diff_y = (dsth >= srch)
×
2072
                                 ? (y + 0.5) - center_y
×
2073
                                 : (y + 0.5) * dsth / srch - center_y;
×
2074
                    weight = f_resample(fabs(diff_y));
×
2075
                    pos = (y * dstw + w) * depth;
×
2076
                    acc512 = sixel_avx512_muladd_ps(
×
2077
                        acc512,
2078
                        sixel_avx512_load_rgb_f32(tmp + pos),
×
2079
                        (float)weight);
2080
                    total += weight;
×
2081
                }
2082
                if (total > 0.0) {
×
2083
                    pos = (h * dstw + w) * depth;
×
2084
                    sixel_avx512_store_rgb_f32(acc512, total, dst + pos);
×
2085
                }
2086
            } else
2087
#endif
2088
#if defined(SIXEL_USE_AVX2)
2089
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX2) {
×
2090
                acc256 = sixel_avx2_zero_ps();
×
2091

2092
                for (y = y_first; y <= y_last; y++) {
×
2093
                    diff_y = (dsth >= srch)
×
2094
                                 ? (y + 0.5) - center_y
×
2095
                                 : (y + 0.5) * dsth / srch - center_y;
×
2096
                    weight = f_resample(fabs(diff_y));
×
2097
                    pos = (y * dstw + w) * depth;
×
2098
                    acc256 = sixel_avx2_muladd_ps(
×
2099
                        acc256,
2100
                        sixel_avx2_load_rgb_f32(tmp + pos),
×
2101
                        (float)weight);
2102
                    total += weight;
×
2103
                }
2104
                if (total > 0.0) {
×
2105
                    pos = (h * dstw + w) * depth;
×
2106
                    sixel_avx2_store_rgb_f32(acc256, total, dst + pos);
×
2107
                }
2108
            } else
2109
#endif
2110
#if defined(SIXEL_USE_AVX)
2111
            if (depth == 3 && simd_level >= SIXEL_SIMD_LEVEL_AVX) {
×
2112
                acc256 = sixel_avx_zero_ps();
×
2113

2114
                for (y = y_first; y <= y_last; y++) {
×
2115
                    diff_y = (dsth >= srch)
×
2116
                                 ? (y + 0.5) - center_y
×
2117
                                 : (y + 0.5) * dsth / srch - center_y;
×
2118
                    weight = f_resample(fabs(diff_y));
×
2119
                    pos = (y * dstw + w) * depth;
×
2120
                    acc256 = sixel_avx_muladd_ps(
×
2121
                        acc256,
2122
                        sixel_avx_load_rgb_f32(tmp + pos),
×
2123
                        (float)weight);
2124
                    total += weight;
×
2125
                }
2126
                if (total > 0.0) {
×
2127
                    pos = (h * dstw + w) * depth;
×
2128
                    sixel_avx_store_rgb_f32(acc256, total, dst + pos);
×
2129
                }
2130
            } else
2131
#endif
2132
#if defined(SIXEL_USE_SSE2) || defined(SIXEL_USE_NEON)
2133
            if (depth == 3
×
2134
# if defined(SIXEL_USE_SSE2)
2135
                && simd_level >= SIXEL_SIMD_LEVEL_SSE2
×
2136
# elif defined(SIXEL_USE_NEON)
2137
                && simd_level >= SIXEL_SIMD_LEVEL_NEON
×
2138
# endif
2139
                ) {
2140
#if defined(SIXEL_USE_SSE2)
2141
                acc128 = _mm_setzero_ps();
2142
                minv128 = _mm_set1_ps(0.0f);
2143
                maxv128 = _mm_set1_ps(1.0f);
2144
#elif defined(SIXEL_USE_NEON)
2145
                acc_neon = vdupq_n_f32(0.0f);
2146
                minv_neon = vdupq_n_f32(0.0f);
2147
                maxv_neon = vdupq_n_f32(1.0f);
2148
#endif
2149
                for (y = y_first; y <= y_last; y++) {
×
2150
                    diff_y = (dsth >= srch)
×
2151
                                 ? (y + 0.5) - center_y
2152
                                 : (y + 0.5) * dsth / srch - center_y;
×
2153
                    weight = f_resample(fabs(diff_y));
2154
                    pos = (y * dstw + w) * depth;
2155
                    const float *psrc = tmp + pos;
2156
#if defined(SIXEL_USE_SSE2)
2157
                    pixf128 = _mm_set_ps(
2158
                        0.0f, psrc[2], psrc[1], psrc[0]);
2159
                    wv128 = _mm_set1_ps((float)weight);
2160
                    acc128 = _mm_add_ps(acc128,
2161
                                        _mm_mul_ps(pixf128, wv128));
2162
#else /* NEON */
2163
                    /*
2164
                     * Expand the RGB triple into a NEON vector without
2165
                     * brace initialization to keep older toolchains
2166
                     * happy.
2167
                     */
2168
                    pixf_neon = vdupq_n_f32(0.0f);
2169
                    pixf_neon = vsetq_lane_f32(psrc[0], pixf_neon, 0);
2170
                    pixf_neon = vsetq_lane_f32(psrc[1], pixf_neon, 1);
2171
                    pixf_neon = vsetq_lane_f32(psrc[2], pixf_neon, 2);
2172
                    wv_neon = vdupq_n_f32((float)weight);
2173
                    acc_neon = vmlaq_f32(acc_neon, pixf_neon, wv_neon);
2174
#endif
2175
                    total += weight;
2176
                }
2177
                if (total > 0.0) {
×
2178
#if defined(SIXEL_USE_SSE2)
2179
                    scalev128 = _mm_set1_ps((float)(1.0 / total));
2180
                    acc128 = _mm_mul_ps(acc128, scalev128);
2181
                    acc128 = _mm_max_ps(minv128,
2182
                                        _mm_min_ps(acc128, maxv128));
2183
                    _mm_storeu_ps(vecbuf, acc128);
2184
#else /* NEON */
2185
                    scalev_neon = vdupq_n_f32(
2186
                        (float)(1.0 / total));
2187
                    acc_neon = vmulq_f32(acc_neon, scalev_neon);
2188
                    acc_neon = vmaxq_f32(minv_neon,
2189
                                         vminq_f32(acc_neon, maxv_neon));
2190
                    vst1q_f32(vecbuf, acc_neon);
2191
#endif
2192
                    pos = (h * dstw + w) * depth;
2193
                    dst[pos + 0] = vecbuf[0];
2194
                    dst[pos + 1] = vecbuf[1];
2195
                    dst[pos + 2] = vecbuf[2];
2196
                }
2197
            } else
2198
#endif
2199
            {
2200
                for (y = y_first; y <= y_last; y++) {
×
2201
                    diff_y = (dsth >= srch)
×
2202
                                 ? (y + 0.5) - center_y
×
2203
                                 : (y + 0.5) * dsth / srch - center_y;
×
2204
                    weight = f_resample(fabs(diff_y));
×
2205
                    for (i = 0; i < depth; i++) {
×
2206
                        pos = (y * dstw + w) * depth + i;
×
2207
                        offsets[i] += tmp[pos] * weight;
×
2208
                    }
2209
                    total += weight;
×
2210
                }
2211

2212
                if (total > 0.0) {
×
2213
                    for (i = 0; i < depth; i++) {
×
2214
                        pos = (h * dstw + w) * depth + i;
×
2215
                        dst[pos] = sixel_clamp_unit_f32(
×
2216
                            (float)(offsets[i] / total));
×
2217
                    }
2218
                }
2219
            }
2220
        }
2221
    }
2222

2223
    sixel_allocator_free(allocator, tmp);
×
2224
}
×
2225

2226

2227
SIXELAPI int
2228
sixel_helper_scale_image(
93✔
2229
    unsigned char       /* out */ *dst,
2230
    unsigned char const /* in */  *src,                   /* source image data */
2231
    int                 /* in */  srcw,                   /* source image width */
2232
    int                 /* in */  srch,                   /* source image height */
2233
    int                 /* in */  pixelformat,            /* one of enum pixelFormat */
2234
    int                 /* in */  dstw,                   /* destination image width */
2235
    int                 /* in */  dsth,                   /* destination image height */
2236
    int                 /* in */  method_for_resampling,  /* one of methodForResampling */
2237
    sixel_allocator_t   /* in */  *allocator)             /* allocator object */
2238
{
2239
    /*
2240
     * Convert the source image to RGB24 if necessary and scale it to the
2241
     * requested destination size.  The caller supplies an allocator used
2242
     * for any temporary buffers required during conversion or filtering.
2243
     */
2244
    int const depth = sixel_helper_compute_depth(pixelformat);
93✔
2245
    unsigned char *new_src = NULL;  /* optional converted source buffer */
93✔
2246
    int nret;
93✔
2247
    int new_pixelformat;
93✔
2248

2249
    /* ensure the scaler operates on RGB triples */
2250
    if (depth != 3) {
93!
2251
        new_src = (unsigned char *)sixel_allocator_malloc(allocator,
×
2252
                                                          (size_t)(srcw * srch * 3));
×
2253
        if (new_src == NULL) {
×
2254
            return (-1);
2255
        }
2256
        nret = sixel_helper_normalize_pixelformat(new_src,
×
2257
                                                  &new_pixelformat,
2258
                                                  src, pixelformat,
2259
                                                  srcw, srch);
2260
        if (nret != 0) {
×
2261
            sixel_allocator_free(allocator, new_src);
×
2262
            return (-1);
×
2263
        }
2264

2265
        src = new_src;  /* use converted buffer from here on */
2266
    } else {
2267
        new_pixelformat = pixelformat;
93✔
2268
    }
2269

2270
    /* choose re-sampling strategy */
2271
    switch (method_for_resampling) {
93!
2272
    case SIXEL_RES_NEAREST:
18✔
2273
        scale_without_resampling(dst, src, srcw, srch, dstw, dsth, depth);
18✔
2274
        break;
18✔
2275
    case SIXEL_RES_GAUSSIAN:
3✔
2276
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
3✔
2277
                              gaussian, 1.0, allocator);
2278
        break;
3✔
2279
    case SIXEL_RES_HANNING:
3✔
2280
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
3✔
2281
                              hanning, 1.0, allocator);
2282
        break;
3✔
2283
    case SIXEL_RES_HAMMING:
3✔
2284
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
3✔
2285
                              hamming, 1.0, allocator);
2286
        break;
3✔
2287
    case SIXEL_RES_WELSH:
3✔
2288
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
3✔
2289
                              welsh, 1.0, allocator);
2290
        break;
3✔
2291
    case SIXEL_RES_BICUBIC:
3✔
2292
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
3✔
2293
                              bicubic, 2.0, allocator);
2294
        break;
3✔
2295
    case SIXEL_RES_LANCZOS2:
6✔
2296
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
6✔
2297
                              lanczos2, 2.0, allocator);
2298
        break;
6✔
2299
    case SIXEL_RES_LANCZOS3:
3✔
2300
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
3✔
2301
                              lanczos3, 3.0, allocator);
2302
        break;
3✔
2303
    case SIXEL_RES_LANCZOS4:
3✔
2304
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
3✔
2305
                              lanczos4, 4.0, allocator);
2306
        break;
3✔
2307
    case SIXEL_RES_BILINEAR:
48✔
2308
    default:
2309
        scale_with_resampling(dst, src, srcw, srch, dstw, dsth, depth,
48✔
2310
                              bilinear, 1.0, allocator);
2311
        break;
48✔
2312
    }
2313

2314
    /* release temporary copy created for pixel-format normalization */
2315
    sixel_allocator_free(allocator, new_src);
93✔
2316
    return 0;
93✔
2317
}
2318

2319
SIXELAPI int
2320
sixel_helper_scale_image_float32(
×
2321
    float             /* out */ *dst,
2322
    float const       /* in */  *src,
2323
    int               /* in */  srcw,
2324
    int               /* in */  srch,
2325
    int               /* in */  pixelformat,
2326
    int               /* in */  dstw,
2327
    int               /* in */  dsth,
2328
    int               /* in */  method_for_resampling,
2329
    sixel_allocator_t /* in */  *allocator)
2330
{
2331
    int depth;
×
2332
    int depth_bytes;
×
2333

2334
    depth_bytes = sixel_helper_compute_depth(pixelformat);
×
2335
    if (depth_bytes <= 0) {
×
2336
        return (-1);
2337
    }
2338

2339
    depth = depth_bytes / (int)sizeof(float);
×
2340
    if (depth * (int)sizeof(float) != depth_bytes) {
×
2341
        return (-1);
2342
    }
2343

2344
    switch (method_for_resampling) {
×
2345
    case SIXEL_RES_NEAREST:
×
2346
        scale_without_resampling_float32(
×
2347
            dst, src, srcw, srch, dstw, dsth, depth);
2348
        break;
×
2349
    case SIXEL_RES_GAUSSIAN:
×
2350
        scale_with_resampling_float32(
×
2351
            dst, src, srcw, srch, dstw, dsth, depth,
2352
            gaussian, 1.0, allocator);
2353
        break;
×
2354
    case SIXEL_RES_HANNING:
×
2355
        scale_with_resampling_float32(
×
2356
            dst, src, srcw, srch, dstw, dsth, depth,
2357
            hanning, 1.0, allocator);
2358
        break;
×
2359
    case SIXEL_RES_HAMMING:
×
2360
        scale_with_resampling_float32(
×
2361
            dst, src, srcw, srch, dstw, dsth, depth,
2362
            hamming, 1.0, allocator);
2363
        break;
×
2364
    case SIXEL_RES_WELSH:
×
2365
        scale_with_resampling_float32(
×
2366
            dst, src, srcw, srch, dstw, dsth, depth,
2367
            welsh, 1.0, allocator);
2368
        break;
×
2369
    case SIXEL_RES_BICUBIC:
×
2370
        scale_with_resampling_float32(
×
2371
            dst, src, srcw, srch, dstw, dsth, depth,
2372
            bicubic, 2.0, allocator);
2373
        break;
×
2374
    case SIXEL_RES_LANCZOS2:
×
2375
        scale_with_resampling_float32(
×
2376
            dst, src, srcw, srch, dstw, dsth, depth,
2377
            lanczos2, 2.0, allocator);
2378
        break;
×
2379
    case SIXEL_RES_LANCZOS3:
×
2380
        scale_with_resampling_float32(
×
2381
            dst, src, srcw, srch, dstw, dsth, depth,
2382
            lanczos3, 3.0, allocator);
2383
        break;
×
2384
    case SIXEL_RES_LANCZOS4:
×
2385
        scale_with_resampling_float32(
×
2386
            dst, src, srcw, srch, dstw, dsth, depth,
2387
            lanczos4, 4.0, allocator);
2388
        break;
×
2389
    case SIXEL_RES_BILINEAR:
×
2390
    default:
2391
        scale_with_resampling_float32(
×
2392
            dst, src, srcw, srch, dstw, dsth, depth,
2393
            bilinear, 1.0, allocator);
2394
        break;
×
2395
    }
2396

2397
    return 0;
2398
}
2399

2400
#if HAVE_TESTS
2401

2402
static void
2403
reference_scale(
×
2404
    unsigned char *dst,
2405
    unsigned char const *src,
2406
    int const srcw,
2407
    int const srch,
2408
    int const dstw,
2409
    int const dsth,
2410
    int const depth)
2411
{
2412
    int w;
×
2413
    int h;
×
2414
    int x;
×
2415
    int y;
×
2416
    int i;
×
2417
    int pos;
×
2418

2419
    for (h = 0; h < dsth; h++) {
×
2420
        for (w = 0; w < dstw; w++) {
×
2421
            x = (long)w * srcw / dstw;
×
2422
            y = (long)h * srch / dsth;
×
2423
            for (i = 0; i < depth; i++) {
×
2424
                pos = (y * srcw + x) * depth + i;
×
2425
                dst[(h * dstw + w) * depth + i] = src[pos];
×
2426
            }
2427
        }
2428
    }
2429
}
×
2430

2431
static int
2432
test_without_resampling_case(
×
2433
    int srcw,
2434
    int srch,
2435
    int dstw,
2436
    int dsth,
2437
    int depth)
2438
{
2439
    int nret = EXIT_FAILURE;
×
2440
    size_t srcsize = (size_t)srcw * srch * depth;
×
2441
    size_t dstsize = (size_t)dstw * dsth * depth;
×
2442
    unsigned char *src = NULL;
×
2443
    unsigned char *ref = NULL;
×
2444
    unsigned char *out = NULL;
×
2445
    size_t i;
×
2446

2447
    src = (unsigned char *)malloc(srcsize);
×
2448
    ref = (unsigned char *)malloc(dstsize);
×
2449
    out = (unsigned char *)malloc(dstsize);
×
2450
    if (src == NULL || ref == NULL || out == NULL) {
×
2451
        goto end;
×
2452
    }
2453

2454
    for (i = 0; i < srcsize; ++i) {
×
2455
        src[i] = (unsigned char)(i & 0xff);
×
2456
    }
2457

2458
    reference_scale(ref, src, srcw, srch, dstw, dsth, depth);
×
2459
    scale_without_resampling(out, src, srcw, srch, dstw, dsth, depth);
×
2460

2461
    if (memcmp(ref, out, dstsize) != 0) {
×
2462
        goto end;
×
2463
    }
2464

2465
    nret = EXIT_SUCCESS;
2466

2467
end:
×
2468
    free(src);
×
2469
    free(ref);
×
2470
    free(out);
×
2471
    return nret;
×
2472
}
2473

2474
SIXELAPI int
2475
sixel_scale_tests_main(void)
×
2476
{
2477
    int nret = EXIT_FAILURE;
×
2478
    size_t i;
×
2479
    struct {
×
2480
        int srcw;
2481
        int srch;
2482
        int dstw;
2483
        int dsth;
2484
        int depth;
2485
    } cases[] = {
×
2486
        {8, 4, 3, 7, 3},
2487
        {13, 9, 17, 6, 4}
2488
    };
2489

2490
    for (i = 0; i < sizeof(cases) / sizeof(cases[0]); ++i) {
×
2491
        nret = test_without_resampling_case(cases[i].srcw,
×
2492
                                            cases[i].srch,
2493
                                            cases[i].dstw,
2494
                                            cases[i].dsth,
2495
                                            cases[i].depth);
2496
        if (nret != EXIT_SUCCESS) {
×
2497
            goto end;
×
2498
        }
2499
    }
2500

2501
    nret = EXIT_SUCCESS;
2502

2503
end:
×
2504
    return nret;
×
2505
}
2506

2507
#endif /* HAVE_TESTS */
2508

2509
#if defined(__GNUC__) && !defined(__clang__)
2510
# pragma GCC diagnostic pop
2511
#endif
2512

2513
/* emacs Local Variables:      */
2514
/* emacs mode: c               */
2515
/* emacs tab-width: 4          */
2516
/* emacs indent-tabs-mode: nil */
2517
/* emacs c-basic-offset: 4     */
2518
/* emacs End:                  */
2519
/* vim: set expandtab ts=4 sts=4 sw=4 : */
2520
/* EOF */
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc