• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IntelPython / dpctl / 5579681118

pending completion
5579681118

push

github

web-flow
Merge pull request #1281 from IntelPython/unary_out_overlap

Created a temporary copy in case of overlap for unary function

2269 of 2783 branches covered (81.53%)

Branch coverage included in aggregate %.

15 of 16 new or added lines in 1 file covered. (93.75%)

34 existing lines in 1 file now uncovered.

8281 of 9898 relevant lines covered (83.66%)

5828.35 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.41
/dpctl/tensor/_copy_utils.py
1
#                       Data Parallel Control (dpctl)
2
#
3
#  Copyright 2020-2022 Intel Corporation
4
#
5
#  Licensed under the Apache License, Version 2.0 (the "License");
6
#  you may not use this file except in compliance with the License.
7
#  You may obtain a copy of the License at
8
#
9
#     http://www.apache.org/licenses/LICENSE-2.0
10
#
11
#  Unless required by applicable law or agreed to in writing, software
12
#  distributed under the License is distributed on an "AS IS" BASIS,
13
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
#  See the License for the specific language governing permissions and
15
#  limitations under the License.
16
import operator
1✔
17

18
import numpy as np
1✔
19
from numpy.core.numeric import normalize_axis_index
1✔
20

21
import dpctl
1✔
22
import dpctl.memory as dpm
1✔
23
import dpctl.tensor as dpt
1✔
24
import dpctl.tensor._tensor_impl as ti
1✔
25
import dpctl.utils
1✔
26
from dpctl.tensor._ctors import _get_dtype
1✔
27
from dpctl.tensor._device import normalize_queue_device
1✔
28

29
__doc__ = (
1✔
30
    "Implementation module for copy- and cast- operations on "
31
    ":class:`dpctl.tensor.usm_ndarray`."
32
)
33

34

35
def _copy_to_numpy(ary):
1✔
36
    if not isinstance(ary, dpt.usm_ndarray):
1!
37
        raise TypeError
×
38
    nb = ary.usm_data.nbytes
1✔
39
    hh = dpm.MemoryUSMHost(nb, queue=ary.sycl_queue)
1✔
40
    hh.copy_from_device(ary.usm_data)
1✔
41
    h = np.ndarray(nb, dtype="u1", buffer=hh).view(ary.dtype)
1✔
42
    itsz = ary.itemsize
1✔
43
    strides_bytes = tuple(si * itsz for si in ary.strides)
1✔
44
    offset = ary.__sycl_usm_array_interface__.get("offset", 0) * itsz
1✔
45
    return np.ndarray(
1✔
46
        ary.shape,
47
        dtype=ary.dtype,
48
        buffer=h,
49
        strides=strides_bytes,
50
        offset=offset,
51
    )
52

53

54
def _copy_from_numpy(np_ary, usm_type="device", sycl_queue=None):
1✔
55
    "Copies numpy array `np_ary` into a new usm_ndarray"
56
    # This may peform a copy to meet stated requirements
57
    Xnp = np.require(np_ary, requirements=["A", "E"])
1✔
58
    alloc_q = normalize_queue_device(sycl_queue=sycl_queue, device=None)
1✔
59
    dt = Xnp.dtype
1✔
60
    if dt.char in "dD" and alloc_q.sycl_device.has_aspect_fp64 is False:
1!
61
        Xusm_dtype = (
×
62
            dpt.dtype("float32") if dt.char == "d" else dpt.dtype("complex64")
63
        )
64
    else:
65
        Xusm_dtype = dt
1✔
66
    Xusm = dpt.empty(
1✔
67
        Xnp.shape, dtype=Xusm_dtype, usm_type=usm_type, sycl_queue=sycl_queue
68
    )
69
    _copy_from_numpy_into(Xusm, Xnp)
1✔
70
    return Xusm
1✔
71

72

73
def _copy_from_numpy_into(dst, np_ary):
1✔
74
    "Copies `np_ary` into `dst` of type :class:`dpctl.tensor.usm_ndarray"
75
    if not isinstance(np_ary, np.ndarray):
1!
76
        raise TypeError(f"Expected numpy.ndarray, got {type(np_ary)}")
×
77
    if not isinstance(dst, dpt.usm_ndarray):
1!
78
        raise TypeError(f"Expected usm_ndarray, got {type(dst)}")
×
79
    if np_ary.flags["OWNDATA"]:
1✔
80
        Xnp = np_ary
1✔
81
    else:
82
        # Determine base of input array
83
        base = np_ary.base
1✔
84
        while isinstance(base, np.ndarray):
1✔
85
            base = base.base
1✔
86
        if isinstance(base, dpm._memory._Memory):
1✔
87
            # we must perform a copy, since subsequent
88
            # _copy_numpy_ndarray_into_usm_ndarray is implemented using
89
            # sycl::buffer, and using USM-pointers with sycl::buffer
90
            # results is undefined behavior
91
            Xnp = np_ary.copy()
1✔
92
        else:
93
            Xnp = np_ary
1✔
94
    src_ary = np.broadcast_to(Xnp, dst.shape)
1✔
95
    copy_q = dst.sycl_queue
1✔
96
    if copy_q.sycl_device.has_aspect_fp64 is False:
1!
97
        src_ary_dt_c = src_ary.dtype.char
×
98
        if src_ary_dt_c == "d":
×
99
            src_ary = src_ary.astype(np.float32)
×
100
        elif src_ary_dt_c == "D":
×
101
            src_ary = src_ary.astype(np.complex64)
×
102
    ti._copy_numpy_ndarray_into_usm_ndarray(
1✔
103
        src=src_ary, dst=dst, sycl_queue=copy_q
104
    )
105

106

107
def from_numpy(np_ary, device=None, usm_type="device", sycl_queue=None):
1✔
108
    """
109
    from_numpy(arg, device=None, usm_type="device", sycl_queue=None)
110

111
    Creates :class:`dpctl.tensor.usm_ndarray` from instance of
112
    :class:`numpy.ndarray`.
113

114
    Args:
115
        arg (array-like): An instance of input convertible to
116
            :class:`numpy.ndarray`
117
        device (object): array API specification of device where the
118
            output array is created. Device can be specified by a
119
            a filter selector string, an instance of
120
            :class:`dpctl.SyclDevice`, an instance of
121
            :class:`dpctl.SyclQueue`, an instance of
122
            :class:`dpctl.tensor.Device`. If the value is `None`,
123
            returned array is created on the default-selected device.
124
            Default: `None`.
125
        usm_type (str): The requested USM allocation type for the
126
            output array. Recognized values are `"device"`, `"shared"`,
127
            or `"host"`.
128
        sycl_queue (:class:`dpctl.SyclQueue`, optional):
129
            A SYCL queue that determines output array allocation device
130
            as well as execution placement of data movement operations.
131
            The `device` and `sycl_queue` arguments
132
            are equivalent. Only one of them should be specified. If both
133
            are provided, they must be consistent and result in using the
134
            same execution queue. Default: `None`.
135

136
    The returned array has the same shape, and the same data type kind.
137
    If the device does not support the data type of input array, a
138
    closest support data type of the same kind may be returned, e.g.
139
    input array of type `float16` may be upcast to `float32` if the
140
    target device does not support 16-bit floating point type.
141
    """
142
    q = normalize_queue_device(sycl_queue=sycl_queue, device=device)
1✔
143
    return _copy_from_numpy(np_ary, usm_type=usm_type, sycl_queue=q)
1✔
144

145

146
def to_numpy(usm_ary):
1✔
147
    """
148
    to_numpy(usm_ary)
149

150
    Copies content of :class:`dpctl.tensor.usm_ndarray` instance `usm_ary`
151
    into :class:`numpy.ndarray` instance of the same shape and same data type.
152

153
    Args:
154
        usm_ary (usm_ndarray):
155
            Input array
156
    Returns:
157
        :class:`numpy.ndarray`:
158
            An instance of :class:`numpy.ndarray` populated with content of
159
            `usm_ary`
160
    """
161
    return _copy_to_numpy(usm_ary)
1✔
162

163

164
def asnumpy(usm_ary):
1✔
165
    """
166
    asnumpy(usm_ary)
167

168
    Copies content of :class:`dpctl.tensor.usm_ndarray` instance `usm_ary`
169
    into :class:`numpy.ndarray` instance of the same shape and same data
170
    type.
171

172
    Args:
173
        usm_ary (usm_ndarray):
174
            Input array
175
    Returns:
176
        :class:`numpy.ndarray`:
177
            An instance of :class:`numpy.ndarray` populated with content
178
            of `usm_ary`
179
    """
180
    return _copy_to_numpy(usm_ary)
1✔
181

182

183
class Dummy:
1✔
184
    """
185
    Helper class with specified ``__sycl_usm_array_interface__`` attribute
186
    """
187

188
    def __init__(self, iface):
1✔
189
        self.__sycl_usm_array_interface__ = iface
×
190

191

192
def _copy_overlapping(dst, src):
1✔
193
    """Assumes src and dst have the same shape."""
UNCOV
194
    q = normalize_queue_device(sycl_queue=dst.sycl_queue)
×
UNCOV
195
    tmp = dpt.usm_ndarray(
×
196
        src.shape,
197
        dtype=src.dtype,
198
        buffer="device",
199
        order="C",
200
        buffer_ctor_kwargs={"queue": q},
201
    )
UNCOV
202
    hcp1, cp1 = ti._copy_usm_ndarray_into_usm_ndarray(
×
203
        src=src, dst=tmp, sycl_queue=q
204
    )
UNCOV
205
    hcp2, _ = ti._copy_usm_ndarray_into_usm_ndarray(
×
206
        src=tmp, dst=dst, sycl_queue=q, depends=[cp1]
207
    )
UNCOV
208
    hcp2.wait()
×
UNCOV
209
    hcp1.wait()
×
210

211

212
def _copy_same_shape(dst, src):
1✔
213
    """Assumes src and dst have the same shape."""
214
    # check that memory regions do not overlap
215
    if ti._array_overlap(dst, src):
1✔
216
        if src._pointer == dst._pointer and (
1!
217
            src is dst
218
            or (src.strides == dst.strides and src.dtype == dst.dtype)
219
        ):
220
            return
1✔
UNCOV
221
        _copy_overlapping(src=src, dst=dst)
×
UNCOV
222
        return
×
223

224
    hev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
1✔
225
        src=src, dst=dst, sycl_queue=dst.sycl_queue
226
    )
227
    hev.wait()
1✔
228

229

230
if hasattr(np, "broadcast_shapes"):
1!
231

232
    def _broadcast_shapes(sh1, sh2):
1✔
233
        return np.broadcast_shapes(sh1, sh2)
1✔
234

235
else:
236

UNCOV
237
    def _broadcast_shapes(sh1, sh2):
×
238
        # use arrays with zero strides, whose memory footprint
239
        # is independent of the number of array elements
UNCOV
240
        return np.broadcast(
×
241
            np.empty(sh1, dtype=[]),
242
            np.empty(sh2, dtype=[]),
243
        ).shape
244

245

246
def _copy_from_usm_ndarray_to_usm_ndarray(dst, src):
1✔
247
    if any(
1!
248
        not isinstance(arg, dpt.usm_ndarray)
249
        for arg in (
250
            dst,
251
            src,
252
        )
253
    ):
UNCOV
254
        raise TypeError(
×
255
            "Both types are expected to be dpctl.tensor.usm_ndarray, "
256
            f"got {type(dst)} and {type(src)}."
257
        )
258

259
    if dst.ndim == src.ndim and dst.shape == src.shape:
1✔
260
        _copy_same_shape(dst, src)
1✔
261
        return
1✔
262

263
    try:
1✔
264
        common_shape = _broadcast_shapes(dst.shape, src.shape)
1✔
265
    except ValueError as exc:
1✔
266
        raise ValueError("Shapes of two arrays are not compatible") from exc
1✔
267

268
    if dst.size < src.size:
1!
UNCOV
269
        raise ValueError("Destination is smaller ")
×
270

271
    if len(common_shape) > dst.ndim:
1✔
272
        ones_count = len(common_shape) - dst.ndim
1✔
273
        for k in range(ones_count):
1✔
274
            if common_shape[k] != 1:
1✔
275
                raise ValueError
1✔
276
        common_shape = common_shape[ones_count:]
1✔
277

278
    if src.ndim < len(common_shape):
1✔
279
        new_src_strides = (0,) * (len(common_shape) - src.ndim) + src.strides
1✔
280
        src_same_shape = dpt.usm_ndarray(
1✔
281
            common_shape, dtype=src.dtype, buffer=src, strides=new_src_strides
282
        )
283
    else:
284
        src_same_shape = src
1✔
285
        src_same_shape.shape = common_shape
1✔
286

287
    _copy_same_shape(dst, src_same_shape)
1✔
288

289

290
def copy(usm_ary, order="K"):
1✔
291
    """copy(ary, order="K")
292

293
    Creates a copy of given instance of :class:`dpctl.tensor.usm_ndarray`.
294

295
    Args:
296
        ary (usm_ndarray):
297
            Input array.
298
        order ({"C", "F", "A", "K"}, optional):
299
            Controls the memory layout of the output array.
300
    Returns:
301
        usm_ndarray:
302
            A copy of the input array.
303

304
    Memory layout of the copy is controlled by `order` keyword,
305
    following NumPy's conventions. The `order` keywords can be
306
    one of the following:
307

308
       - "C": C-contiguous memory layout
309
       - "F": Fortran-contiguous memory layout
310
       - "A": Fortran-contiguous if the input array is also Fortran-contiguous,
311
         otherwise C-contiguous
312
       - "K": match the layout of `usm_ary` as closely as possible.
313

314
    """
315
    if not isinstance(usm_ary, dpt.usm_ndarray):
1!
UNCOV
316
        return TypeError(
×
317
            f"Expected object of type dpt.usm_ndarray, got {type(usm_ary)}"
318
        )
319
    copy_order = "C"
1✔
320
    if order == "C":
1✔
321
        pass
1✔
322
    elif order == "F":
1✔
323
        copy_order = order
1✔
324
    elif order == "A":
1✔
325
        if usm_ary.flags.f_contiguous:
1!
326
            copy_order = "F"
×
327
    elif order == "K":
1!
328
        if usm_ary.flags.f_contiguous:
1✔
329
            copy_order = "F"
1✔
330
    else:
UNCOV
331
        raise ValueError(
×
332
            "Unrecognized value of the order keyword. "
333
            "Recognized values are 'A', 'C', 'F', or 'K'"
334
        )
335
    c_contig = usm_ary.flags.c_contiguous
1✔
336
    f_contig = usm_ary.flags.f_contiguous
1✔
337
    R = dpt.usm_ndarray(
1✔
338
        usm_ary.shape,
339
        dtype=usm_ary.dtype,
340
        buffer=usm_ary.usm_type,
341
        order=copy_order,
342
        buffer_ctor_kwargs={"queue": usm_ary.sycl_queue},
343
    )
344
    if order == "K" and (not c_contig and not f_contig):
1✔
345
        original_strides = usm_ary.strides
1✔
346
        ind = sorted(
1✔
347
            range(usm_ary.ndim),
348
            key=lambda i: abs(original_strides[i]),
349
            reverse=True,
350
        )
351
        new_strides = tuple(R.strides[ind[i]] for i in ind)
1✔
352
        R = dpt.usm_ndarray(
1✔
353
            usm_ary.shape,
354
            dtype=usm_ary.dtype,
355
            buffer=R.usm_data,
356
            strides=new_strides,
357
        )
358
    _copy_same_shape(R, usm_ary)
1✔
359
    return R
1✔
360

361

362
def astype(usm_ary, newdtype, order="K", casting="unsafe", copy=True):
1✔
363
    """ astype(array, new_dtype, order="K", casting="unsafe", \
364
            copy=True)
365

366
    Returns a copy of the :class:`dpctl.tensor.usm_ndarray`, cast to a
367
    specified type.
368

369
    Args:
370
        array (usm_ndarray):
371
            An input array.
372
        new_dtype (dtype):
373
            The data type of the resulting array. If `None`, gives default
374
            floating point type supported by device where `array` is allocated.
375
        order ({"C", "F", "A", "K"}, optional):
376
            Controls memory layout of the resulting array if a copy
377
            is returned.
378
        casting ({'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional):
379
            Controls what kind of data casting may occur. Please see
380
            :meth:`numpy.ndarray.astype` for description of casting modes.
381
        copy (bool, optional):
382
            By default, `astype` always returns a newly allocated array.
383
            If this keyword is set to `False`, a view of the input array
384
            may be returned when possible.
385

386
    Returns:
387
        usm_ndarray:
388
            An array with requested data type.
389

390
    A view can be returned, if possible, when `copy=False` is used.
391
    """
392
    if not isinstance(usm_ary, dpt.usm_ndarray):
1!
UNCOV
393
        return TypeError(
×
394
            f"Expected object of type dpt.usm_ndarray, got {type(usm_ary)}"
395
        )
396
    if not isinstance(order, str) or order not in ["A", "C", "F", "K"]:
1✔
397
        raise ValueError(
1✔
398
            "Unrecognized value of the order keyword. "
399
            "Recognized values are 'A', 'C', 'F', or 'K'"
400
        )
401
    ary_dtype = usm_ary.dtype
1✔
402
    target_dtype = _get_dtype(newdtype, usm_ary.sycl_queue)
1✔
403
    if not dpt.can_cast(ary_dtype, target_dtype, casting=casting):
1!
UNCOV
404
        raise TypeError(
×
405
            f"Can not cast from {ary_dtype} to {newdtype} "
406
            f"according to rule {casting}."
407
        )
408
    c_contig = usm_ary.flags.c_contiguous
1✔
409
    f_contig = usm_ary.flags.f_contiguous
1✔
410
    needs_copy = copy or not ary_dtype == target_dtype
1✔
411
    if not needs_copy and (order != "K"):
1!
UNCOV
412
        needs_copy = (c_contig and order not in ["A", "C"]) or (
×
413
            f_contig and order not in ["A", "F"]
414
        )
415
    if not needs_copy:
1✔
416
        return usm_ary
1✔
417
    copy_order = "C"
1✔
418
    if order == "C":
1✔
419
        pass
1✔
420
    elif order == "F":
1!
UNCOV
421
        copy_order = order
×
422
    elif order == "A":
1!
UNCOV
423
        if usm_ary.flags.f_contiguous:
×
424
            copy_order = "F"
×
425
    elif order == "K":
1!
426
        if usm_ary.flags.f_contiguous:
1✔
427
            copy_order = "F"
1✔
428
    else:
UNCOV
429
        raise ValueError(
×
430
            "Unrecognized value of the order keyword. "
431
            "Recognized values are 'A', 'C', 'F', or 'K'"
432
        )
433
    R = dpt.usm_ndarray(
1✔
434
        usm_ary.shape,
435
        dtype=target_dtype,
436
        buffer=usm_ary.usm_type,
437
        order=copy_order,
438
        buffer_ctor_kwargs={"queue": usm_ary.sycl_queue},
439
    )
440
    if order == "K" and (not c_contig and not f_contig):
1✔
441
        original_strides = usm_ary.strides
1✔
442
        ind = sorted(
1✔
443
            range(usm_ary.ndim),
444
            key=lambda i: abs(original_strides[i]),
445
            reverse=True,
446
        )
447
        new_strides = tuple(R.strides[ind[i]] for i in ind)
1✔
448
        R = dpt.usm_ndarray(
1✔
449
            usm_ary.shape,
450
            dtype=target_dtype,
451
            buffer=R.usm_data,
452
            strides=new_strides,
453
        )
454
    _copy_from_usm_ndarray_to_usm_ndarray(R, usm_ary)
1✔
455
    return R
1✔
456

457

458
def _extract_impl(ary, ary_mask, axis=0):
1✔
459
    """Extract elements of ary by applying mask starting from slot
460
    dimension axis"""
461
    if not isinstance(ary, dpt.usm_ndarray):
1!
UNCOV
462
        raise TypeError(
×
463
            f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
464
        )
465
    if not isinstance(ary_mask, dpt.usm_ndarray):
1!
UNCOV
466
        raise TypeError(
×
467
            f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary_mask)}"
468
        )
469
    exec_q = dpctl.utils.get_execution_queue(
1✔
470
        (ary.sycl_queue, ary_mask.sycl_queue)
471
    )
472
    if exec_q is None:
1!
UNCOV
473
        raise dpctl.utils.ExecutionPlacementError(
×
474
            "arrays have different associated queues. "
475
            "Use `Y.to_device(X.device)` to migrate."
476
        )
477
    ary_nd = ary.ndim
1✔
478
    pp = normalize_axis_index(operator.index(axis), ary_nd)
1✔
479
    mask_nd = ary_mask.ndim
1✔
480
    if pp < 0 or pp + mask_nd > ary_nd:
1!
UNCOV
481
        raise ValueError(
×
482
            "Parameter p is inconsistent with input array dimensions"
483
        )
484
    mask_nelems = ary_mask.size
1✔
485
    cumsum = dpt.empty(mask_nelems, dtype=dpt.int64, device=ary_mask.device)
1✔
486
    exec_q = cumsum.sycl_queue
1✔
487
    mask_count = ti.mask_positions(ary_mask, cumsum, sycl_queue=exec_q)
1✔
488
    dst_shape = ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :]
1✔
489
    dst = dpt.empty(
1✔
490
        dst_shape, dtype=ary.dtype, usm_type=ary.usm_type, device=ary.device
491
    )
492
    hev, _ = ti._extract(
1✔
493
        src=ary,
494
        cumsum=cumsum,
495
        axis_start=pp,
496
        axis_end=pp + mask_nd,
497
        dst=dst,
498
        sycl_queue=exec_q,
499
    )
500
    hev.wait()
1✔
501
    return dst
1✔
502

503

504
def _nonzero_impl(ary):
1✔
505
    if not isinstance(ary, dpt.usm_ndarray):
1!
UNCOV
506
        raise TypeError(
×
507
            f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
508
        )
509
    exec_q = ary.sycl_queue
1✔
510
    usm_type = ary.usm_type
1✔
511
    mask_nelems = ary.size
1✔
512
    cumsum = dpt.empty(
1✔
513
        mask_nelems, dtype=dpt.int64, sycl_queue=exec_q, order="C"
514
    )
515
    mask_count = ti.mask_positions(ary, cumsum, sycl_queue=exec_q)
1✔
516
    indexes = dpt.empty(
1✔
517
        (ary.ndim, mask_count),
518
        dtype=cumsum.dtype,
519
        usm_type=usm_type,
520
        sycl_queue=exec_q,
521
        order="C",
522
    )
523
    hev, _ = ti._nonzero(cumsum, indexes, ary.shape, exec_q)
1✔
524
    res = tuple(indexes[i, :] for i in range(ary.ndim))
1✔
525
    hev.wait()
1✔
526
    return res
1✔
527

528

529
def _take_multi_index(ary, inds, p):
1✔
530
    if not isinstance(ary, dpt.usm_ndarray):
1!
UNCOV
531
        raise TypeError
×
532
    queues_ = [
1✔
533
        ary.sycl_queue,
534
    ]
535
    usm_types_ = [
1✔
536
        ary.usm_type,
537
    ]
538
    if not isinstance(inds, list) and not isinstance(inds, tuple):
1!
UNCOV
539
        inds = (inds,)
×
540
    all_integers = True
1✔
541
    for ind in inds:
1✔
542
        queues_.append(ind.sycl_queue)
1✔
543
        usm_types_.append(ind.usm_type)
1✔
544
        if all_integers:
1!
545
            all_integers = ind.dtype.kind in "ui"
1✔
546
    exec_q = dpctl.utils.get_execution_queue(queues_)
1✔
547
    if exec_q is None:
1✔
548
        raise dpctl.utils.ExecutionPlacementError("")
1✔
549
    if not all_integers:
1✔
550
        raise IndexError(
1✔
551
            "arrays used as indices must be of integer (or boolean) type"
552
        )
553
    if len(inds) > 1:
1✔
554
        inds = dpt.broadcast_arrays(*inds)
1✔
555
    ary_ndim = ary.ndim
1✔
556
    p = normalize_axis_index(operator.index(p), ary_ndim)
1✔
557

558
    res_shape = ary.shape[:p] + inds[0].shape + ary.shape[p + len(inds) :]
1✔
559
    res_usm_type = dpctl.utils.get_coerced_usm_type(usm_types_)
1✔
560
    res = dpt.empty(
1✔
561
        res_shape, dtype=ary.dtype, usm_type=res_usm_type, sycl_queue=exec_q
562
    )
563

564
    hev, _ = ti._take(
1✔
565
        src=ary, ind=inds, dst=res, axis_start=p, mode=0, sycl_queue=exec_q
566
    )
567
    hev.wait()
1✔
568

569
    return res
1✔
570

571

572
def _place_impl(ary, ary_mask, vals, axis=0):
1✔
573
    """Extract elements of ary by applying mask starting from slot
574
    dimension axis"""
575
    if not isinstance(ary, dpt.usm_ndarray):
1!
UNCOV
576
        raise TypeError(
×
577
            f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
578
        )
579
    if not isinstance(ary_mask, dpt.usm_ndarray):
1!
UNCOV
580
        raise TypeError(
×
581
            f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary_mask)}"
582
        )
583
    exec_q = dpctl.utils.get_execution_queue(
1✔
584
        (
585
            ary.sycl_queue,
586
            ary_mask.sycl_queue,
587
        )
588
    )
589
    if exec_q is not None:
1!
590
        if not isinstance(vals, dpt.usm_ndarray):
1✔
591
            vals = dpt.asarray(vals, dtype=ary.dtype, sycl_queue=exec_q)
1✔
592
        else:
593
            exec_q = dpctl.utils.get_execution_queue((exec_q, vals.sycl_queue))
1✔
594
    if exec_q is None:
1!
UNCOV
595
        raise dpctl.utils.ExecutionPlacementError(
×
596
            "arrays have different associated queues. "
597
            "Use `Y.to_device(X.device)` to migrate."
598
        )
599
    ary_nd = ary.ndim
1✔
600
    pp = normalize_axis_index(operator.index(axis), ary_nd)
1✔
601
    mask_nd = ary_mask.ndim
1✔
602
    if pp < 0 or pp + mask_nd > ary_nd:
1!
UNCOV
603
        raise ValueError(
×
604
            "Parameter p is inconsistent with input array dimensions"
605
        )
606
    mask_nelems = ary_mask.size
1✔
607
    cumsum = dpt.empty(mask_nelems, dtype=dpt.int64, device=ary_mask.device)
1✔
608
    exec_q = cumsum.sycl_queue
1✔
609
    mask_count = ti.mask_positions(ary_mask, cumsum, sycl_queue=exec_q)
1✔
610
    expected_vals_shape = (
1✔
611
        ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :]
612
    )
613
    if vals.dtype == ary.dtype:
1!
614
        rhs = vals
1✔
615
    else:
UNCOV
616
        rhs = dpt.astype(vals, ary.dtype)
×
617
    rhs = dpt.broadcast_to(rhs, expected_vals_shape)
1✔
618
    hev, _ = ti._place(
1✔
619
        dst=ary,
620
        cumsum=cumsum,
621
        axis_start=pp,
622
        axis_end=pp + mask_nd,
623
        rhs=rhs,
624
        sycl_queue=exec_q,
625
    )
626
    hev.wait()
1✔
627
    return
1✔
628

629

630
def _put_multi_index(ary, inds, p, vals):
1✔
631
    if isinstance(vals, dpt.usm_ndarray):
1✔
632
        queues_ = [ary.sycl_queue, vals.sycl_queue]
1✔
633
        usm_types_ = [ary.usm_type, vals.usm_type]
1✔
634
    else:
635
        queues_ = [
1✔
636
            ary.sycl_queue,
637
        ]
638
        usm_types_ = [
1✔
639
            ary.usm_type,
640
        ]
641
    if not isinstance(inds, list) and not isinstance(inds, tuple):
1!
UNCOV
642
        inds = (inds,)
×
643
    all_integers = True
1✔
644
    for ind in inds:
1✔
645
        if not isinstance(ind, dpt.usm_ndarray):
1!
UNCOV
646
            raise TypeError
×
647
        queues_.append(ind.sycl_queue)
1✔
648
        usm_types_.append(ind.usm_type)
1✔
649
        if all_integers:
1!
650
            all_integers = ind.dtype.kind in "ui"
1✔
651
    exec_q = dpctl.utils.get_execution_queue(queues_)
1✔
652
    if exec_q is None:
1✔
653
        raise dpctl.utils.ExecutionPlacementError(
1✔
654
            "Can not automatically determine where to allocate the "
655
            "result or performance execution. "
656
            "Use `usm_ndarray.to_device` method to migrate data to "
657
            "be associated with the same queue."
658
        )
659
    if not all_integers:
1✔
660
        raise IndexError(
1✔
661
            "arrays used as indices must be of integer (or boolean) type"
662
        )
663
    if len(inds) > 1:
1✔
664
        inds = dpt.broadcast_arrays(*inds)
1✔
665
    ary_ndim = ary.ndim
1✔
666

667
    p = normalize_axis_index(operator.index(p), ary_ndim)
1✔
668
    vals_shape = ary.shape[:p] + inds[0].shape + ary.shape[p + len(inds) :]
1✔
669

670
    vals_usm_type = dpctl.utils.get_coerced_usm_type(usm_types_)
1✔
671
    if not isinstance(vals, dpt.usm_ndarray):
1✔
672
        vals = dpt.asarray(
1✔
673
            vals, ary.dtype, usm_type=vals_usm_type, sycl_queue=exec_q
674
        )
675

676
    vals = dpt.broadcast_to(vals, vals_shape)
1✔
677

678
    hev, _ = ti._put(
1✔
679
        dst=ary, ind=inds, val=vals, axis_start=p, mode=0, sycl_queue=exec_q
680
    )
681
    hev.wait()
1✔
682

683
    return
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc