• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IntelPython / dpnp / 24687578389

20 Apr 2026 08:03PM UTC coverage: 78.429% (-3.6%) from 82.034%
24687578389

push

github

web-flow
Migrate `dpctl.tensor` into `dpnp.tensor` (#2856)

This PR migrates the tensor implementation from `dpctl.tensor` into
`dpnp.tensor` making dpnp the primary owner of the Array API-compliant
tensor layer

Major changes:

- Move compiled C++/SYCL extensions (`_tensor_impl,
_tensor_elementwise_impl, _tensor_reductions_impl, _tensor_sorting_impl,
_tensor_accumulation_impl, tensor linalg`) into `dpnp.tensor`
- Move `usm_ndarray`, `compute-follows-data utilities` and tensor
`tests` from dpctl
- Replace all `dpctl.tensor` references with `dpnp.tensor` in
docstrings, error messages and comments
- Remove redundant dpctl.tensor C-API  interface
- Add `tensor.rst` documentation page describing the module, its
relationship to `dpnp.ndarray` and `dpctl` and linking to the `dpctl
0.21.1 API` reference

This simplifies maintenance, reduces cross-project dependencies and
enables independent development and release cycles

1573 of 2908 branches covered (54.09%)

Branch coverage included in aggregate %.

6973 of 9803 new or added lines in 203 files covered. (71.13%)

1 existing line in 1 file now uncovered.

26259 of 32579 relevant lines covered (80.6%)

7622.15 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

25.51
/dpnp/tensor/libtensor/include/utils/offset_utils.hpp
1
//*****************************************************************************
2
// Copyright (c) 2026, Intel Corporation
3
// All rights reserved.
4
//
5
// Redistribution and use in source and binary forms, with or without
6
// modification, are permitted provided that the following conditions are met:
7
// - Redistributions of source code must retain the above copyright notice,
8
//   this list of conditions and the following disclaimer.
9
// - Redistributions in binary form must reproduce the above copyright notice,
10
//   this list of conditions and the following disclaimer in the documentation
11
//   and/or other materials provided with the distribution.
12
// - Neither the name of the copyright holder nor the names of its contributors
13
//   may be used to endorse or promote products derived from this software
14
//   without specific prior written permission.
15
//
16
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26
// THE POSSIBILITY OF SUCH DAMAGE.
27
//*****************************************************************************
28
///
29
/// \file
30
/// This file defines Indexer callable operator to compute element offset in
31
/// an array addressed by gloabl_id.
32
//===----------------------------------------------------------------------===//
33

34
#pragma once
35

36
#include <algorithm>
37
#include <array>
38
#include <cstddef>
39
#include <memory>
40
#include <tuple>
41
#include <utility>
42
#include <vector>
43

44
#include <sycl/sycl.hpp>
45

46
#include "kernels/dpnp_tensor_types.hpp"
47
#include "utils/strided_iters.hpp"
48
#include "utils/sycl_alloc_utils.hpp"
49

50
namespace dpnp::tensor::offset_utils
51
{
52
namespace detail
53
{
54
struct sink_t
55
{
56
    sink_t() {};
10,034✔
57
    template <class T>
58
    sink_t(T &&) {};
14,640✔
59
};
60

61
template <class V>
62
std::size_t __accumulate_size(std::size_t &s, V &&v)
63
{
10,034✔
64
    return s += v.size();
10,034✔
65
}
10,034✔
66

67
template <class V, class U>
68
sink_t __appender(V &lhs, U &&rhs)
69
{
10,034✔
70
    lhs.insert(lhs.end(), rhs.begin(), rhs.end());
10,034✔
71
    return {};
10,034✔
72
}
10,034✔
73

74
template <typename T, typename A, typename... Vs>
75
std::vector<T, A> concat(std::vector<T, A> lhs, Vs &&...vs)
76
{
2,303✔
77
    std::size_t s = lhs.size();
2,303✔
78
    {
2,303✔
79
        // limited scope ensures array is freed
80
        [[maybe_unused]] sink_t tmp[] = {__accumulate_size(s, vs)..., 0};
2,303✔
81
    }
2,303✔
82
    lhs.reserve(s);
2,303✔
83
    {
2,303✔
84
        // array of no-data objects ensures ordering of calls to the appender
85
        [[maybe_unused]] sink_t tmp[] = {
2,303✔
86
            __appender(lhs, std::forward<Vs>(vs))..., 0};
2,303✔
87
    }
2,303✔
88

89
    return std::move(lhs); // prevent return-value optimization
2,303✔
90
}
2,303✔
91
} // namespace detail
92

93
template <typename indT, typename... Vs>
94
std::tuple<std::unique_ptr<indT, dpnp::tensor::alloc_utils::USMDeleter>,
95
           std::size_t,
96
           sycl::event>
97
    device_allocate_and_pack(sycl::queue &q,
98
                             std::vector<sycl::event> &host_task_events,
99
                             Vs &&...vs)
100
{
2,303✔
101

102
    using dpnp::tensor::alloc_utils::usm_host_allocator;
2,303✔
103

104
    // memory transfer optimization, use USM-host for temporary speeds up
105
    // transfer to device, especially on dGPUs
106
    using usm_host_allocatorT = usm_host_allocator<indT>;
2,303✔
107
    using shT = std::vector<indT, usm_host_allocatorT>;
2,303✔
108

109
    usm_host_allocatorT usm_host_alloc(q);
2,303✔
110
    shT empty{0, usm_host_alloc};
2,303✔
111
    shT packed_shape_strides = detail::concat(std::move(empty), vs...);
2,303✔
112

113
    auto packed_shape_strides_owner =
2,303✔
114
        std::make_shared<shT>(std::move(packed_shape_strides));
2,303✔
115

116
    auto sz = packed_shape_strides_owner->size();
2,303✔
117
    auto shape_strides_owner =
2,303✔
118
        dpnp::tensor::alloc_utils::smart_malloc_device<indT>(sz, q);
2,303✔
119
    indT *shape_strides = shape_strides_owner.get();
2,303✔
120

121
    sycl::event copy_ev =
2,303✔
122
        q.copy<indT>(packed_shape_strides_owner->data(), shape_strides, sz);
2,303✔
123

124
    sycl::event cleanup_host_task_ev = q.submit([&](sycl::handler &cgh) {
2,303✔
125
        cgh.depends_on(copy_ev);
2,303✔
126
        cgh.host_task([packed_shape_strides_owner =
2,303✔
127
                           std::move(packed_shape_strides_owner)] {
2,303✔
128
            // increment shared pointer ref-count to keep it alive
129
            // till copy operation completes;
130
        });
2,303✔
131
    });
2,303✔
132
    host_task_events.push_back(cleanup_host_task_ev);
2,303✔
133

134
    return std::make_tuple(std::move(shape_strides_owner), sz, copy_ev);
2,303✔
135
}
2,303✔
136

137
struct NoOpIndexer
138
{
NEW
139
    constexpr NoOpIndexer() {}
×
NEW
140
    constexpr std::size_t operator()(std::size_t gid) const { return gid; }
×
141
};
142

143
using dpnp::tensor::ssize_t;
144

145
/* @brief Indexer with shape and strides arrays of same size are packed */
146
struct StridedIndexer
147
{
148
    StridedIndexer(int _nd,
149
                   ssize_t _offset,
150
                   ssize_t const *_packed_shape_strides)
151
        : nd(_nd), starting_offset(_offset),
152
          shape_strides(_packed_shape_strides)
NEW
153
    {
×
NEW
154
    }
×
155

NEW
156
    ssize_t operator()(ssize_t gid) const { return compute_offset(gid); }
×
157

158
    ssize_t operator()(std::size_t gid) const
NEW
159
    {
×
NEW
160
        return compute_offset(static_cast<ssize_t>(gid));
×
NEW
161
    }
×
162

163
private:
164
    int nd;
165
    ssize_t starting_offset;
166
    ssize_t const *shape_strides;
167

168
    ssize_t compute_offset(ssize_t gid) const
NEW
169
    {
×
NEW
170
        using dpnp::tensor::strides::CIndexer_vector;
×
NEW
171

×
NEW
172
        CIndexer_vector _ind(nd);
×
NEW
173
        ssize_t relative_offset(0);
×
NEW
174
        _ind.get_displacement<const ssize_t *, const ssize_t *>(
×
NEW
175
            gid,
×
NEW
176
            shape_strides,      // shape ptr
×
NEW
177
            shape_strides + nd, // strides ptr
×
NEW
178
            relative_offset);
×
NEW
179
        return starting_offset + relative_offset;
×
NEW
180
    }
×
181
};
182

183
// ensure that indexer is device copyable
184
static_assert(sycl::is_device_copyable_v<StridedIndexer>);
185

186
/* @brief Indexer with shape, strides provided separately */
187
struct UnpackedStridedIndexer
188
{
189
    UnpackedStridedIndexer(int _nd,
190
                           ssize_t _offset,
191
                           ssize_t const *_shape,
192
                           ssize_t const *_strides)
193
        : nd(_nd), starting_offset(_offset), shape(_shape), strides(_strides)
NEW
194
    {
×
NEW
195
    }
×
196

NEW
197
    ssize_t operator()(ssize_t gid) const { return compute_offset(gid); }
×
198

199
    ssize_t operator()(std::size_t gid) const
NEW
200
    {
×
NEW
201
        return compute_offset(static_cast<ssize_t>(gid));
×
NEW
202
    }
×
203

204
private:
205
    int nd;
206
    ssize_t starting_offset;
207
    ssize_t const *shape;
208
    ssize_t const *strides;
209

210
    ssize_t compute_offset(ssize_t gid) const
NEW
211
    {
×
NEW
212
        using dpnp::tensor::strides::CIndexer_vector;
×
NEW
213

×
NEW
214
        CIndexer_vector _ind(nd);
×
NEW
215
        ssize_t relative_offset(0);
×
NEW
216
        _ind.get_displacement<const ssize_t *, const ssize_t *>(
×
NEW
217
            gid,
×
NEW
218
            shape,   // shape ptr
×
NEW
219
            strides, // strides ptr
×
NEW
220
            relative_offset);
×
NEW
221
        return starting_offset + relative_offset;
×
NEW
222
    }
×
223
};
224

225
// ensure that indexer is device copyable
226
static_assert(sycl::is_device_copyable_v<UnpackedStridedIndexer>);
227

228
struct Strided1DIndexer
229
{
NEW
230
    Strided1DIndexer(std::size_t _size) : offset{}, size(_size), step(1) {}
×
231
    Strided1DIndexer(ssize_t _size)
232
        : offset{}, size(static_cast<std::size_t>(_size)), step(1)
NEW
233
    {
×
NEW
234
    }
×
235
    Strided1DIndexer(std::size_t _size, ssize_t _step)
236
        : offset{}, size(_size), step(_step)
NEW
237
    {
×
NEW
238
    }
×
239
    Strided1DIndexer(std::size_t _size, std::size_t _step)
240
        : offset{}, size(_size), step(static_cast<ssize_t>(_step))
NEW
241
    {
×
NEW
242
    }
×
243
    Strided1DIndexer(ssize_t _size, ssize_t _step)
244
        : offset{}, size(static_cast<std::size_t>(_size)), step(_step)
NEW
245
    {
×
NEW
246
    }
×
247
    Strided1DIndexer(ssize_t _offset, std::size_t _size, ssize_t _step)
248
        : offset(_offset), size(_size), step(_step)
NEW
249
    {
×
NEW
250
    }
×
251
    Strided1DIndexer(ssize_t _offset, std::size_t _size, std::size_t _step)
252
        : offset(_offset), size(_size), step(static_cast<ssize_t>(_step))
NEW
253
    {
×
NEW
254
    }
×
255
    Strided1DIndexer(ssize_t _offset, ssize_t _size, ssize_t _step)
256
        : offset(_offset), size(static_cast<std::size_t>(_size)), step(_step)
NEW
257
    {
×
NEW
258
    }
×
259

260
    ssize_t operator()(std::size_t gid) const
NEW
261
    {
×
NEW
262
        // ensure 0 <= gid < size
×
NEW
263
        return offset + std::min<std::size_t>(gid, size - 1) * step;
×
NEW
264
    }
×
265

266
private:
267
    ssize_t offset = 0;
268
    std::size_t size = 1;
269
    ssize_t step = 1;
270
};
271

272
static_assert(sycl::is_device_copyable_v<Strided1DIndexer>);
273

274
struct Strided1DCyclicIndexer
275
{
276
    Strided1DCyclicIndexer(ssize_t _offset, ssize_t _size, ssize_t _step)
277
        : offset(_offset), size(static_cast<std::size_t>(_size)), step(_step)
NEW
278
    {
×
NEW
279
    }
×
280

281
    ssize_t operator()(std::size_t gid) const
NEW
282
    {
×
NEW
283
        return offset + (gid % size) * step;
×
NEW
284
    }
×
285

286
private:
287
    ssize_t offset = 0;
288
    std::size_t size = 1;
289
    ssize_t step = 1;
290
};
291

292
static_assert(sycl::is_device_copyable_v<Strided1DCyclicIndexer>);
293

294
template <typename displacementT>
295
struct TwoOffsets
296
{
297
    constexpr TwoOffsets() : first_offset(0), second_offset(0) {}
298
    constexpr TwoOffsets(const displacementT &first_offset_,
299
                         const displacementT &second_offset_)
NEW
300
        : first_offset(first_offset_), second_offset(second_offset_)
×
NEW
301
    {
×
NEW
302
    }
×
303

NEW
304
    constexpr displacementT get_first_offset() const { return first_offset; }
×
NEW
305
    constexpr displacementT get_second_offset() const { return second_offset; }
×
306

307
private:
308
    displacementT first_offset = 0;
309
    displacementT second_offset = 0;
310
};
311

312
struct TwoOffsets_StridedIndexer
313
{
314
    TwoOffsets_StridedIndexer(int common_nd,
315
                              ssize_t first_offset_,
316
                              ssize_t second_offset_,
317
                              ssize_t const *_packed_shape_strides)
318
        : nd(common_nd), starting_first_offset(first_offset_),
127✔
319
          starting_second_offset(second_offset_),
127✔
320
          shape_strides(_packed_shape_strides)
127✔
321
    {
127✔
322
    }
127✔
323

324
    TwoOffsets<ssize_t> operator()(ssize_t gid) const
NEW
325
    {
×
NEW
326
        return compute_offsets(gid);
×
NEW
327
    }
×
328

329
    TwoOffsets<ssize_t> operator()(std::size_t gid) const
NEW
330
    {
×
NEW
331
        return compute_offsets(static_cast<ssize_t>(gid));
×
NEW
332
    }
×
333

334
private:
335
    int nd;
336
    ssize_t starting_first_offset;
337
    ssize_t starting_second_offset;
338
    ssize_t const *shape_strides;
339

340
    TwoOffsets<ssize_t> compute_offsets(ssize_t gid) const
NEW
341
    {
×
NEW
342
        using dpnp::tensor::strides::CIndexer_vector;
×
NEW
343

×
NEW
344
        CIndexer_vector _ind(nd);
×
NEW
345
        ssize_t relative_first_offset(0);
×
NEW
346
        ssize_t relative_second_offset(0);
×
NEW
347
        _ind.get_displacement<const ssize_t *, const ssize_t *>(
×
NEW
348
            gid,
×
NEW
349
            shape_strides,          // shape ptr
×
NEW
350
            shape_strides + nd,     // strides ptr
×
NEW
351
            shape_strides + 2 * nd, // strides ptr
×
NEW
352
            relative_first_offset, relative_second_offset);
×
NEW
353
        return TwoOffsets<ssize_t>(
×
NEW
354
            starting_first_offset + relative_first_offset,
×
NEW
355
            starting_second_offset + relative_second_offset);
×
NEW
356
    }
×
357
};
358

359
struct TwoZeroOffsets_Indexer
360
{
NEW
361
    constexpr TwoZeroOffsets_Indexer() {}
×
362

363
    constexpr TwoOffsets<ssize_t> operator()(ssize_t) const
NEW
364
    {
×
NEW
365
        return TwoOffsets<ssize_t>();
×
NEW
366
    }
×
367
};
368

369
static_assert(sycl::is_device_copyable_v<TwoZeroOffsets_Indexer>);
370

371
template <typename FirstIndexerT, typename SecondIndexerT>
372
struct TwoOffsets_CombinedIndexer
373
{
374
private:
375
    FirstIndexerT first_indexer_;
376
    SecondIndexerT second_indexer_;
377

378
public:
379
    constexpr TwoOffsets_CombinedIndexer(const FirstIndexerT &first_indexer,
380
                                         const SecondIndexerT &second_indexer)
381
        : first_indexer_(first_indexer), second_indexer_(second_indexer)
382
    {
383
    }
384

385
    constexpr TwoOffsets<ssize_t> operator()(ssize_t gid) const
386
    {
387
        return TwoOffsets<ssize_t>(first_indexer_(gid), second_indexer_(gid));
388
    }
389
};
390

391
template <typename displacementT>
392
struct ThreeOffsets
393
{
394
    constexpr ThreeOffsets()
395
        : first_offset(0), second_offset(0), third_offset(0)
396
    {
397
    }
398
    constexpr ThreeOffsets(const displacementT &first_offset_,
399
                           const displacementT &second_offset_,
400
                           const displacementT &third_offset_)
NEW
401
        : first_offset(first_offset_), second_offset(second_offset_),
×
NEW
402
          third_offset(third_offset_)
×
NEW
403
    {
×
NEW
404
    }
×
405

NEW
406
    constexpr displacementT get_first_offset() const { return first_offset; }
×
NEW
407
    constexpr displacementT get_second_offset() const { return second_offset; }
×
NEW
408
    constexpr displacementT get_third_offset() const { return third_offset; }
×
409

410
private:
411
    displacementT first_offset = 0;
412
    displacementT second_offset = 0;
413
    displacementT third_offset = 0;
414
};
415

416
struct ThreeOffsets_StridedIndexer
417
{
418
    ThreeOffsets_StridedIndexer(int common_nd,
419
                                ssize_t first_offset_,
420
                                ssize_t second_offset_,
421
                                ssize_t third_offset_,
422
                                ssize_t const *_packed_shape_strides)
423
        : nd(common_nd), starting_first_offset(first_offset_),
1,413✔
424
          starting_second_offset(second_offset_),
1,413✔
425
          starting_third_offset(third_offset_),
1,413✔
426
          shape_strides(_packed_shape_strides)
1,413✔
427
    {
1,413✔
428
    }
1,413✔
429

430
    ThreeOffsets<ssize_t> operator()(ssize_t gid) const
NEW
431
    {
×
NEW
432
        return compute_offsets(gid);
×
NEW
433
    }
×
434

435
    ThreeOffsets<ssize_t> operator()(std::size_t gid) const
NEW
436
    {
×
NEW
437
        return compute_offsets(static_cast<ssize_t>(gid));
×
NEW
438
    }
×
439

440
private:
441
    int nd;
442
    ssize_t starting_first_offset;
443
    ssize_t starting_second_offset;
444
    ssize_t starting_third_offset;
445
    ssize_t const *shape_strides;
446

447
    ThreeOffsets<ssize_t> compute_offsets(ssize_t gid) const
NEW
448
    {
×
NEW
449
        using dpnp::tensor::strides::CIndexer_vector;
×
NEW
450

×
NEW
451
        CIndexer_vector _ind(nd);
×
NEW
452
        ssize_t relative_first_offset(0);
×
NEW
453
        ssize_t relative_second_offset(0);
×
NEW
454
        ssize_t relative_third_offset(0);
×
NEW
455
        _ind.get_displacement<const ssize_t *, const ssize_t *>(
×
NEW
456
            gid,
×
NEW
457
            shape_strides,          // shape ptr
×
NEW
458
            shape_strides + nd,     // strides ptr
×
NEW
459
            shape_strides + 2 * nd, // strides ptr
×
NEW
460
            shape_strides + 3 * nd, // strides ptr
×
NEW
461
            relative_first_offset, relative_second_offset,
×
NEW
462
            relative_third_offset);
×
NEW
463
        return ThreeOffsets<ssize_t>(
×
NEW
464
            starting_first_offset + relative_first_offset,
×
NEW
465
            starting_second_offset + relative_second_offset,
×
NEW
466
            starting_third_offset + relative_third_offset);
×
NEW
467
    }
×
468
};
469

470
static_assert(sycl::is_device_copyable_v<ThreeOffsets_StridedIndexer>);
471

472
struct ThreeZeroOffsets_Indexer
473
{
NEW
474
    constexpr ThreeZeroOffsets_Indexer() {}
×
475

476
    constexpr ThreeOffsets<ssize_t> operator()(ssize_t) const
NEW
477
    {
×
NEW
478
        return ThreeOffsets<ssize_t>();
×
NEW
479
    }
×
480

481
    constexpr ThreeOffsets<ssize_t> operator()(std::size_t) const
NEW
482
    {
×
NEW
483
        return ThreeOffsets<ssize_t>();
×
NEW
484
    }
×
485
};
486

487
static_assert(sycl::is_device_copyable_v<ThreeZeroOffsets_Indexer>);
488

489
template <typename FirstIndexerT,
490
          typename SecondIndexerT,
491
          typename ThirdIndexerT>
492
struct ThreeOffsets_CombinedIndexer
493
{
494
private:
495
    FirstIndexerT first_indexer_;
496
    SecondIndexerT second_indexer_;
497
    ThirdIndexerT third_indexer_;
498

499
public:
500
    constexpr ThreeOffsets_CombinedIndexer(const FirstIndexerT &first_indexer,
501
                                           const SecondIndexerT &second_indexer,
502
                                           const ThirdIndexerT &third_indexer)
503
        : first_indexer_(first_indexer), second_indexer_(second_indexer),
504
          third_indexer_(third_indexer)
505
    {
506
    }
507

508
    constexpr ThreeOffsets<ssize_t> operator()(ssize_t gid) const
509
    {
510
        return ThreeOffsets<ssize_t>(first_indexer_(gid), second_indexer_(gid),
511
                                     third_indexer_(gid));
512
    }
513
};
514

515
template <typename displacementT>
516
struct FourOffsets
517
{
518
    constexpr FourOffsets()
519
        : first_offset(0), second_offset(0), third_offset(0), fourth_offset(0)
520
    {
521
    }
522
    constexpr FourOffsets(const displacementT &first_offset_,
523
                          const displacementT &second_offset_,
524
                          const displacementT &third_offset_,
525
                          const displacementT &fourth_offset_)
NEW
526
        : first_offset(first_offset_), second_offset(second_offset_),
×
NEW
527
          third_offset(third_offset_), fourth_offset(fourth_offset_)
×
NEW
528
    {
×
NEW
529
    }
×
530

NEW
531
    constexpr displacementT get_first_offset() const { return first_offset; }
×
NEW
532
    constexpr displacementT get_second_offset() const { return second_offset; }
×
NEW
533
    constexpr displacementT get_third_offset() const { return third_offset; }
×
NEW
534
    constexpr displacementT get_fourth_offset() const { return fourth_offset; }
×
535

536
private:
537
    displacementT first_offset = 0;
538
    displacementT second_offset = 0;
539
    displacementT third_offset = 0;
540
    displacementT fourth_offset = 0;
541
};
542

543
struct FourOffsets_StridedIndexer
544
{
545
    constexpr FourOffsets_StridedIndexer(int common_nd,
546
                                         ssize_t first_offset_,
547
                                         ssize_t second_offset_,
548
                                         ssize_t third_offset_,
549
                                         ssize_t fourth_offset_,
550
                                         ssize_t const *_packed_shape_strides)
551
        : nd(common_nd), starting_first_offset(first_offset_),
856✔
552
          starting_second_offset(second_offset_),
856✔
553
          starting_third_offset(third_offset_),
856✔
554
          starting_fourth_offset(fourth_offset_),
856✔
555
          shape_strides(_packed_shape_strides)
856✔
556
    {
856✔
557
    }
856✔
558

559
    constexpr FourOffsets<ssize_t> operator()(ssize_t gid) const
NEW
560
    {
×
NEW
561
        return compute_offsets(gid);
×
NEW
562
    }
×
563

564
    constexpr FourOffsets<ssize_t> operator()(std::size_t gid) const
NEW
565
    {
×
NEW
566
        return compute_offsets(static_cast<ssize_t>(gid));
×
NEW
567
    }
×
568

569
private:
570
    int nd;
571
    ssize_t starting_first_offset;
572
    ssize_t starting_second_offset;
573
    ssize_t starting_third_offset;
574
    ssize_t starting_fourth_offset;
575
    ssize_t const *shape_strides;
576

577
    FourOffsets<ssize_t> compute_offsets(ssize_t gid) const
NEW
578
    {
×
NEW
579
        using dpnp::tensor::strides::CIndexer_vector;
×
NEW
580

×
NEW
581
        CIndexer_vector _ind(nd);
×
NEW
582
        ssize_t relative_first_offset(0);
×
NEW
583
        ssize_t relative_second_offset(0);
×
NEW
584
        ssize_t relative_third_offset(0);
×
NEW
585
        ssize_t relative_fourth_offset(0);
×
NEW
586
        _ind.get_displacement<const ssize_t *, const ssize_t *>(
×
NEW
587
            gid,
×
NEW
588
            shape_strides,          // shape ptr
×
NEW
589
            shape_strides + nd,     // strides ptr
×
NEW
590
            shape_strides + 2 * nd, // strides ptr
×
NEW
591
            shape_strides + 3 * nd, // strides ptr
×
NEW
592
            shape_strides + 4 * nd, // strides ptr
×
NEW
593
            relative_first_offset, relative_second_offset,
×
NEW
594
            relative_third_offset, relative_fourth_offset);
×
NEW
595
        return FourOffsets<ssize_t>(
×
NEW
596
            starting_first_offset + relative_first_offset,
×
NEW
597
            starting_second_offset + relative_second_offset,
×
NEW
598
            starting_third_offset + relative_third_offset,
×
NEW
599
            starting_fourth_offset + relative_fourth_offset);
×
NEW
600
    }
×
601
};
602

603
static_assert(sycl::is_device_copyable_v<FourOffsets_StridedIndexer>);
604

605
struct FourZeroOffsets_Indexer
606
{
NEW
607
    constexpr FourZeroOffsets_Indexer() {}
×
608

609
    constexpr FourOffsets<ssize_t> operator()(ssize_t) const
NEW
610
    {
×
NEW
611
        return FourOffsets<ssize_t>();
×
NEW
612
    }
×
613
};
614

615
static_assert(sycl::is_device_copyable_v<FourZeroOffsets_Indexer>);
616

617
struct NthStrideOffset
618
{
619
    NthStrideOffset(int common_nd,
620
                    ssize_t const *_offsets,
621
                    ssize_t const *_packed_shape_strides)
622
        : _ind(common_nd), nd(common_nd), offsets(_offsets),
623
          shape_strides(_packed_shape_strides)
NEW
624
    {
×
NEW
625
    }
×
626

627
    std::size_t operator()(ssize_t gid, int n) const
NEW
628
    {
×
NEW
629
        ssize_t relative_offset(0);
×
NEW
630
        _ind.get_displacement<const ssize_t *, const ssize_t *>(
×
NEW
631
            gid, shape_strides, shape_strides + ((n + 1) * nd),
×
NEW
632
            relative_offset);
×
NEW
633

×
NEW
634
        return relative_offset + offsets[n];
×
NEW
635
    }
×
636

637
private:
638
    dpnp::tensor::strides::CIndexer_vector<ssize_t> _ind;
639

640
    int nd;
641
    ssize_t const *offsets;
642
    ssize_t const *shape_strides;
643
};
644

645
static_assert(sycl::is_device_copyable_v<NthStrideOffset>);
646

647
template <int nd>
648
struct FixedDimStridedIndexer
649
{
650
    FixedDimStridedIndexer(const std::array<ssize_t, nd> &_shape,
651
                           const std::array<ssize_t, nd> &_strides,
652
                           ssize_t _offset)
653
        : _ind(_shape), strides(_strides), starting_offset(_offset)
654
    {
655
    }
656
    std::size_t operator()(std::size_t gid) const
657
    {
658
        dpnp::tensor::strides::CIndexer_array<nd, ssize_t> local_indexer(
659
            std::move(_ind));
660
        local_indexer.set(gid);
661
        auto mi = local_indexer.get();
662

663
        ssize_t relative_offset = 0;
664

665
#pragma unroll
666
        for (int i = 0; i < nd; ++i) {
667
            relative_offset += mi[i] * strides[i];
668
        }
669
        return starting_offset + relative_offset;
670
    }
671

672
private:
673
    dpnp::tensor::strides::CIndexer_array<nd, ssize_t> _ind;
674

675
    std::array<ssize_t, nd> strides;
676
    ssize_t starting_offset;
677
};
678

679
static_assert(sycl::is_device_copyable_v<FixedDimStridedIndexer<1>>);
680

681
template <int nd>
682
struct TwoOffsets_FixedDimStridedIndexer
683
{
684
    TwoOffsets_FixedDimStridedIndexer(const std::array<ssize_t, nd> &_shape,
685
                                      const std::array<ssize_t, nd> &_strides1,
686
                                      const std::array<ssize_t, nd> &_strides2,
687
                                      ssize_t _offset1,
688
                                      ssize_t _offset2)
689
        : _ind(_shape), strides1(_strides1), strides2(_strides2),
690
          starting_offset1(_offset1), starting_offset2(_offset2)
691
    {
692
    }
693

694
    TwoOffsets<ssize_t> operator()(std::size_t gid) const
695
    {
696
        dpnp::tensor::strides::CIndexer_array<nd, ssize_t> local_indexer(
697
            std::move(_ind));
698
        local_indexer.set(gid);
699
        auto mi = local_indexer.get();
700

701
        ssize_t relative_offset1 = 0;
702
#pragma unroll
703
        for (int i = 0; i < nd; ++i) {
704
            relative_offset1 += mi[i] * strides1[i];
705
        }
706

707
        ssize_t relative_offset2 = 0;
708
#pragma unroll
709
        for (int i = 0; i < nd; ++i) {
710
            relative_offset2 += mi[i] * strides2[i];
711
        }
712

713
        return TwoOffsets<ssize_t>(starting_offset1 + relative_offset1,
714
                                   starting_offset2 + relative_offset2);
715
    }
716

717
private:
718
    dpnp::tensor::strides::CIndexer_array<nd, ssize_t> _ind;
719

720
    std::array<ssize_t, nd> strides1;
721
    std::array<ssize_t, nd> strides2;
722
    ssize_t starting_offset1;
723
    ssize_t starting_offset2;
724
};
725

726
static_assert(sycl::is_device_copyable_v<TwoOffsets_FixedDimStridedIndexer<1>>);
727

728
template <int nd>
729
struct ThreeOffsets_FixedDimStridedIndexer
730
{
731
    ThreeOffsets_FixedDimStridedIndexer(
732
        const std::array<ssize_t, nd> &_shape,
733
        const std::array<ssize_t, nd> &_strides1,
734
        const std::array<ssize_t, nd> &_strides2,
735
        const std::array<ssize_t, nd> &_strides3,
736
        ssize_t _offset1,
737
        ssize_t _offset2,
738
        ssize_t _offset3)
739
        : _ind(_shape), strides1(_strides1), strides2(_strides2),
740
          strides3(_strides3), starting_offset1(_offset1),
741
          starting_offset2(_offset2), starting_offset3(_offset3)
742
    {
743
    }
744

745
    ThreeOffsets<ssize_t> operator()(std::size_t gid) const
746
    {
747
        dpnp::tensor::strides::CIndexer_array<nd, ssize_t> local_indexer(
748
            std::move(_ind));
749
        local_indexer.set(gid);
750
        auto mi = local_indexer.get();
751

752
        ssize_t relative_offset1 = 0;
753
#pragma unroll
754
        for (int i = 0; i < nd; ++i) {
755
            relative_offset1 += mi[i] * strides1[i];
756
        }
757

758
        ssize_t relative_offset2 = 0;
759
#pragma unroll
760
        for (int i = 0; i < nd; ++i) {
761
            relative_offset2 += mi[i] * strides2[i];
762
        }
763

764
        ssize_t relative_offset3 = 0;
765
#pragma unroll
766
        for (int i = 0; i < nd; ++i) {
767
            relative_offset3 += mi[i] * strides3[i];
768
        }
769

770
        return ThreeOffsets<ssize_t>(starting_offset1 + relative_offset1,
771
                                     starting_offset2 + relative_offset2,
772
                                     starting_offset3 + relative_offset3);
773
    }
774

775
private:
776
    dpnp::tensor::strides::CIndexer_array<nd, ssize_t> _ind;
777

778
    std::array<ssize_t, nd> strides1;
779
    std::array<ssize_t, nd> strides2;
780
    std::array<ssize_t, nd> strides3;
781
    ssize_t starting_offset1;
782
    ssize_t starting_offset2;
783
    ssize_t starting_offset3;
784
};
785

786
static_assert(
787
    sycl::is_device_copyable_v<ThreeOffsets_FixedDimStridedIndexer<1>>);
788
} // namespace dpnp::tensor::offset_utils
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc