• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IntelPython / dpnp / 24727208533

21 Apr 2026 02:11PM UTC coverage: 78.427% (-0.002%) from 78.429%
24727208533

push

github

web-flow
Use Pybind11 `3.0.4` to build dpnp (#2865)

The PR updates CMakeLists.txt to bump pybind11 `3.0.4` up from `3.0.3`
version.

1573 of 2908 branches covered (54.09%)

Branch coverage included in aggregate %.

26259 of 32580 relevant lines covered (80.6%)

7636.31 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.29
/dpnp/tensor/_elementwise_common.py
1
# *****************************************************************************
2
# Copyright (c) 2026, Intel Corporation
3
# All rights reserved.
4
#
5
# Redistribution and use in source and binary forms, with or without
6
# modification, are permitted provided that the following conditions are met:
7
# - Redistributions of source code must retain the above copyright notice,
8
#   this list of conditions and the following disclaimer.
9
# - Redistributions in binary form must reproduce the above copyright notice,
10
#   this list of conditions and the following disclaimer in the documentation
11
#   and/or other materials provided with the distribution.
12
# - Neither the name of the copyright holder nor the names of its contributors
13
#   may be used to endorse or promote products derived from this software
14
#   without specific prior written permission.
15
#
16
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26
# THE POSSIBILITY OF SUCH DAMAGE.
27
# *****************************************************************************
28

29
from dpctl.utils import SequentialOrderManager
1✔
30

31
import dpnp.tensor as dpt
1✔
32
import dpnp.tensor._tensor_impl as ti
1✔
33

34
from ._copy_utils import _empty_like_orderK, _empty_like_pair_orderK
1✔
35
from ._manipulation_functions import _broadcast_shape_impl
1✔
36
from ._scalar_utils import (
1✔
37
    _get_dtype,
38
    _get_queue_usm_type,
39
    _get_shape,
40
    _validate_dtype,
41
)
42
from ._type_utils import (
1✔
43
    _acceptance_fn_default_binary,
44
    _acceptance_fn_default_unary,
45
    _all_data_types,
46
    _find_buf_dtype,
47
    _find_buf_dtype2,
48
    _find_buf_dtype_in_place_op,
49
    _resolve_weak_types,
50
)
51

52

53
class UnaryElementwiseFunc:
1✔
54
    """
55
    Class that implements unary element-wise functions.
56

57
    Args:
58
        name (str):
59
            Name of the unary function
60
        result_type_resovler_fn (callable):
61
            Function that takes dtype of the input and
62
            returns the dtype of the result if the
63
            implementation functions supports it, or
64
            returns `None` otherwise.
65
        unary_dp_impl_fn (callable):
66
            Data-parallel implementation function with signature
67
            `impl_fn(src: usm_ndarray, dst: usm_ndarray,
68
             sycl_queue: SyclQueue, depends: Optional[List[SyclEvent]])`
69
            where the `src` is the argument array, `dst` is the
70
            array to be populated with function values, effectively
71
            evaluating `dst = func(src)`.
72
            The `impl_fn` is expected to return a 2-tuple of `SyclEvent`s.
73
            The first event corresponds to data-management host tasks,
74
            including lifetime management of argument Python objects to ensure
75
            that their associated USM allocation is not freed before offloaded
76
            computational tasks complete execution, while the second event
77
            corresponds to computational tasks associated with function
78
            evaluation.
79
        acceptance_fn (callable, optional):
80
            Function to influence type promotion behavior of this unary
81
            function. The function takes 4 arguments:
82
                arg_dtype - Data type of the first argument
83
                buf_dtype - Data type the argument would be cast to
84
                res_dtype - Data type of the output array with function values
85
                sycl_dev - The :class:`dpctl.SyclDevice` where the function
86
                    evaluation is carried out.
87
            The function is invoked when the argument of the unary function
88
            requires casting, e.g. the argument of `dpctl.tensor.log` is an
89
            array with integral data type.
90
        docs (str):
91
            Documentation string for the unary function.
92
    """
93

94
    def __init__(
1✔
95
        self,
96
        name,
97
        result_type_resolver_fn,
98
        unary_dp_impl_fn,
99
        docs,
100
        acceptance_fn=None,
101
    ):
102
        self.__name__ = "UnaryElementwiseFunc"
1✔
103
        self.name_ = name
1✔
104
        self.result_type_resolver_fn_ = result_type_resolver_fn
1✔
105
        self.types_ = None
1✔
106
        self.unary_fn_ = unary_dp_impl_fn
1✔
107
        self.__doc__ = docs
1✔
108
        if callable(acceptance_fn):
1✔
109
            self.acceptance_fn_ = acceptance_fn
1✔
110
        else:
111
            self.acceptance_fn_ = _acceptance_fn_default_unary
1✔
112

113
    def __str__(self):
1✔
114
        return f"<{self.__name__} '{self.name_}'>"
×
115

116
    def __repr__(self):
1✔
117
        return f"<{self.__name__} '{self.name_}'>"
×
118

119
    def get_implementation_function(self):
1✔
120
        """Returns the implementation function for
121
        this elementwise unary function.
122

123
        """
124
        return self.unary_fn_
1✔
125

126
    def get_type_result_resolver_function(self):
1✔
127
        """Returns the type resolver function for this
128
        elementwise unary function.
129
        """
130
        return self.result_type_resolver_fn_
1✔
131

132
    def get_type_promotion_path_acceptance_function(self):
1✔
133
        """Returns the acceptance function for this
134
        elementwise binary function.
135

136
        Acceptance function influences the type promotion
137
        behavior of this unary function.
138
        The function takes 4 arguments:
139
            arg_dtype - Data type of the first argument
140
            buf_dtype - Data type the argument would be cast to
141
            res_dtype - Data type of the output array with function values
142
            sycl_dev - The :class:`dpctl.SyclDevice` where the function
143
                evaluation is carried out.
144
        The function is invoked when the argument of the unary function
145
        requires casting, e.g. the argument of `dpctl.tensor.log` is an
146
        array with integral data type.
147
        """
148
        return self.acceptance_fn_
×
149

150
    @property
1✔
151
    def nin(self):
1✔
152
        """Returns the number of arguments treated as inputs."""
153
        return 1
1✔
154

155
    @property
1✔
156
    def nout(self):
1✔
157
        """Returns the number of arguments treated as outputs."""
158
        return 1
1✔
159

160
    @property
1✔
161
    def types(self):
1✔
162
        """Returns information about types supported by
163
        implementation function, using NumPy's character
164
        encoding for data types, e.g.
165

166
        :Example:
167
            .. code-block:: python
168

169
                dpctl.tensor.sin.types
170
                # Outputs: ['e->e', 'f->f', 'd->d', 'F->F', 'D->D']
171
        """
172
        types = self.types_
1✔
173
        if not types:
1✔
174
            types = []
1✔
175
            for dt1 in _all_data_types(True, True):
1✔
176
                dt2 = self.result_type_resolver_fn_(dt1)
1✔
177
                if dt2:
1✔
178
                    types.append(f"{dt1.char}->{dt2.char}")
1✔
179
            self.types_ = types
1✔
180
        return types
1✔
181

182
    def __call__(self, x, /, *, out=None, order="K"):
1✔
183
        if not isinstance(x, dpt.usm_ndarray):
1✔
184
            raise TypeError(f"Expected dpnp.tensor.usm_ndarray, got {type(x)}")
×
185

186
        if order not in ["C", "F", "K", "A"]:
1✔
187
            order = "K"
×
188
        buf_dt, res_dt = _find_buf_dtype(
1✔
189
            x.dtype,
190
            self.result_type_resolver_fn_,
191
            x.sycl_device,
192
            acceptance_fn=self.acceptance_fn_,
193
        )
194
        if res_dt is None:
1✔
195
            raise ValueError(
1✔
196
                f"function '{self.name_}' does not support input type "
197
                f"({x.dtype}), "
198
                "and the input could not be safely coerced to any "
199
                "supported types according to the casting rule ''safe''."
200
            )
201

202
        orig_out = out
1✔
203
        if out is not None:
1✔
204
            if not isinstance(out, dpt.usm_ndarray):
1✔
205
                raise TypeError(
×
206
                    f"output array must be of usm_ndarray type, got {type(out)}"
207
                )
208

209
            if not out.flags.writable:
1✔
210
                raise ValueError("provided `out` array is read-only")
×
211

212
            if out.shape != x.shape:
1✔
213
                raise ValueError(
1✔
214
                    "The shape of input and output arrays are inconsistent. "
215
                    f"Expected output shape is {x.shape}, got {out.shape}"
216
                )
217

218
            if res_dt != out.dtype:
1✔
219
                raise ValueError(
1✔
220
                    f"Output array of type {res_dt} is needed, "
221
                    f"got {out.dtype}"
222
                )
223

224
            if (
1✔
225
                buf_dt is None
226
                and ti._array_overlap(x, out)
227
                and not ti._same_logical_tensors(x, out)
228
            ):
229
                # Allocate a temporary buffer to avoid memory overlapping.
230
                # Note if `buf_dt` is not None, a temporary copy of `x` will be
231
                # created, so the array overlap check isn't needed.
232
                out = dpt.empty_like(out)
×
233

234
            if dpt.get_execution_queue((x.sycl_queue, out.sycl_queue)) is None:
1✔
235
                raise dpt.ExecutionPlacementError(
×
236
                    "Input and output allocation queues are not compatible"
237
                )
238

239
        exec_q = x.sycl_queue
1✔
240
        _manager = SequentialOrderManager[exec_q]
1✔
241
        if buf_dt is None:
1✔
242
            if out is None:
1✔
243
                if order == "K":
1✔
244
                    out = _empty_like_orderK(x, res_dt)
1✔
245
                else:
246
                    if order == "A":
1✔
247
                        order = "F" if x.flags.f_contiguous else "C"
1✔
248
                    out = dpt.empty_like(x, dtype=res_dt, order=order)
1✔
249

250
            dep_evs = _manager.submitted_events
1✔
251
            ht_unary_ev, unary_ev = self.unary_fn_(
1✔
252
                x, out, sycl_queue=exec_q, depends=dep_evs
253
            )
254
            _manager.add_event_pair(ht_unary_ev, unary_ev)
1✔
255

256
            if not (orig_out is None or orig_out is out):
1✔
257
                # Copy the out data from temporary buffer to original memory
258
                ht_copy_ev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
×
259
                    src=out, dst=orig_out, sycl_queue=exec_q, depends=[unary_ev]
260
                )
261
                _manager.add_event_pair(ht_copy_ev, cpy_ev)
×
262
                out = orig_out
×
263

264
            return out
1✔
265

266
        if order == "K":
1✔
267
            buf = _empty_like_orderK(x, buf_dt)
1✔
268
        else:
269
            if order == "A":
×
270
                order = "F" if x.flags.f_contiguous else "C"
×
271
            buf = dpt.empty_like(x, dtype=buf_dt, order=order)
×
272

273
        dep_evs = _manager.submitted_events
1✔
274
        ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
1✔
275
            src=x, dst=buf, sycl_queue=exec_q, depends=dep_evs
276
        )
277
        _manager.add_event_pair(ht_copy_ev, copy_ev)
1✔
278
        if out is None:
1✔
279
            if order == "K":
1✔
280
                out = _empty_like_orderK(buf, res_dt)
1✔
281
            else:
282
                out = dpt.empty_like(buf, dtype=res_dt, order=order)
×
283

284
        ht, uf_ev = self.unary_fn_(
1✔
285
            buf, out, sycl_queue=exec_q, depends=[copy_ev]
286
        )
287
        _manager.add_event_pair(ht, uf_ev)
1✔
288

289
        return out
1✔
290

291

292
class BinaryElementwiseFunc:
1✔
293
    """
294
    Class that implements binary element-wise functions.
295

296
    Args:
297
        name (str):
298
            Name of the unary function
299
        result_type_resovle_fn (callable):
300
            Function that takes dtypes of the input and
301
            returns the dtype of the result if the
302
            implementation functions supports it, or
303
            returns `None` otherwise.
304
        binary_dp_impl_fn (callable):
305
            Data-parallel implementation function with signature
306
            `impl_fn(src1: usm_ndarray, src2: usm_ndarray, dst: usm_ndarray,
307
             sycl_queue: SyclQueue, depends: Optional[List[SyclEvent]])`
308
            where the `src1` and `src2` are the argument arrays, `dst` is the
309
            array to be populated with function values,
310
            i.e. `dst=func(src1, src2)`.
311
            The `impl_fn` is expected to return a 2-tuple of `SyclEvent`s.
312
            The first event corresponds to data-management host tasks,
313
            including lifetime management of argument Python objects to ensure
314
            that their associated USM allocation is not freed before offloaded
315
            computational tasks complete execution, while the second event
316
            corresponds to computational tasks associated with function
317
            evaluation.
318
        docs (str):
319
            Documentation string for the unary function.
320
        binary_inplace_fn (callable, optional):
321
            Data-parallel implementation function with signature
322
            `impl_fn(src: usm_ndarray, dst: usm_ndarray,
323
             sycl_queue: SyclQueue, depends: Optional[List[SyclEvent]])`
324
            where the `src` is the argument array, `dst` is the
325
            array to be populated with function values,
326
            i.e. `dst=func(dst, src)`.
327
            The `impl_fn` is expected to return a 2-tuple of `SyclEvent`s.
328
            The first event corresponds to data-management host tasks,
329
            including async lifetime management of Python arguments,
330
            while the second event corresponds to computational tasks
331
            associated with function evaluation.
332
        acceptance_fn (callable, optional):
333
            Function to influence type promotion behavior of this binary
334
            function. The function takes 6 arguments:
335
                arg1_dtype - Data type of the first argument
336
                arg2_dtype - Data type of the second argument
337
                ret_buf1_dtype - Data type the first argument would be cast to
338
                ret_buf2_dtype - Data type the second argument would be cast to
339
                res_dtype - Data type of the output array with function values
340
                sycl_dev - The :class:`dpctl.SyclDevice` where the function
341
                    evaluation is carried out.
342
            The function is only called when both arguments of the binary
343
            function require casting, e.g. both arguments of
344
            `dpctl.tensor.logaddexp` are arrays with integral data type.
345
    """
346

347
    def __init__(
1✔
348
        self,
349
        name,
350
        result_type_resolver_fn,
351
        binary_dp_impl_fn,
352
        docs,
353
        binary_inplace_fn=None,
354
        acceptance_fn=None,
355
        weak_type_resolver=None,
356
    ):
357
        self.__name__ = "BinaryElementwiseFunc"
1✔
358
        self.name_ = name
1✔
359
        self.result_type_resolver_fn_ = result_type_resolver_fn
1✔
360
        self.types_ = None
1✔
361
        self.binary_fn_ = binary_dp_impl_fn
1✔
362
        self.binary_inplace_fn_ = binary_inplace_fn
1✔
363
        self.__doc__ = docs
1✔
364
        if callable(acceptance_fn):
1✔
365
            self.acceptance_fn_ = acceptance_fn
1✔
366
        else:
367
            self.acceptance_fn_ = _acceptance_fn_default_binary
1✔
368
        if callable(weak_type_resolver):
1✔
369
            self.weak_type_resolver_ = weak_type_resolver
1✔
370
        else:
371
            self.weak_type_resolver_ = _resolve_weak_types
1✔
372

373
    def __str__(self):
1✔
374
        return f"<{self.__name__} '{self.name_}'>"
×
375

376
    def __repr__(self):
1✔
377
        return f"<{self.__name__} '{self.name_}'>"
×
378

379
    def get_implementation_function(self):
1✔
380
        """Returns the out-of-place implementation
381
        function for this elementwise binary function.
382

383
        """
384
        return self.binary_fn_
1✔
385

386
    def get_implementation_inplace_function(self):
1✔
387
        """Returns the in-place implementation
388
        function for this elementwise binary function.
389

390
        """
391
        return self.binary_inplace_fn_
×
392

393
    def get_type_result_resolver_function(self):
1✔
394
        """Returns the type resolver function for this
395
        elementwise binary function.
396
        """
397
        return self.result_type_resolver_fn_
1✔
398

399
    def get_type_promotion_path_acceptance_function(self):
1✔
400
        """Returns the acceptance function for this
401
        elementwise binary function.
402

403
        Acceptance function influences the type promotion
404
        behavior of this binary function.
405
        The function takes 6 arguments:
406
            arg1_dtype - Data type of the first argument
407
            arg2_dtype - Data type of the second argument
408
            ret_buf1_dtype - Data type the first argument would be cast to
409
            ret_buf2_dtype - Data type the second argument would be cast to
410
            res_dtype - Data type of the output array with function values
411
            sycl_dev - :class:`dpctl.SyclDevice` on which function evaluation
412
                is carried out.
413

414
        The acceptance function is only invoked if both input arrays must be
415
        cast to intermediary data types, as would happen during call of
416
        `dpctl.tensor.hypot` with both arrays being of integral data type.
417
        """
418
        return self.acceptance_fn_
×
419

420
    def get_array_dtype_scalar_type_resolver_function(self):
1✔
421
        """Returns the function which determines how to treat
422
        Python scalar types for this elementwise binary function.
423

424
        Resolver influences what type the scalar will be
425
        treated as prior to type promotion behavior.
426
        The function takes 3 arguments:
427

428
        Args:
429
            o1_dtype (object, dtype):
430
                A class representing a Python scalar type or a ``dtype``
431
            o2_dtype (object, dtype):
432
                A class representing a Python scalar type or a ``dtype``
433
            sycl_dev (:class:`dpctl.SyclDevice`):
434
                Device on which function evaluation is carried out.
435

436
        One of ``o1_dtype`` and ``o2_dtype`` must be a ``dtype`` instance.
437
        """
438
        return self.weak_type_resolver_
1✔
439

440
    @property
1✔
441
    def nin(self):
1✔
442
        """Returns the number of arguments treated as inputs."""
443
        return 2
1✔
444

445
    @property
1✔
446
    def nout(self):
1✔
447
        """Returns the number of arguments treated as outputs."""
448
        return 1
1✔
449

450
    @property
1✔
451
    def types(self):
1✔
452
        """Returns information about types supported by
453
        implementation function, using NumPy's character
454
        encoding for data types, e.g.
455

456
        :Example:
457
            .. code-block:: python
458

459
                dpctl.tensor.divide.types
460
                # Outputs: ['ee->e', 'ff->f', 'fF->F', 'dd->d', 'dD->D',
461
                #    'Ff->F', 'FF->F', 'Dd->D', 'DD->D']
462
        """
463
        types = self.types_
1✔
464
        if not types:
1✔
465
            types = []
1✔
466
            _all_dtypes = _all_data_types(True, True)
1✔
467
            for dt1 in _all_dtypes:
1✔
468
                for dt2 in _all_dtypes:
1✔
469
                    dt3 = self.result_type_resolver_fn_(dt1, dt2)
1✔
470
                    if dt3:
1✔
471
                        types.append(f"{dt1.char}{dt2.char}->{dt3.char}")
1✔
472
            self.types_ = types
1✔
473
        return types
1✔
474

475
    def __call__(self, o1, o2, /, *, out=None, order="K"):
1✔
476
        if order not in ["K", "C", "F", "A"]:
1✔
477
            order = "K"
×
478
        q1, o1_usm_type = _get_queue_usm_type(o1)
1✔
479
        q2, o2_usm_type = _get_queue_usm_type(o2)
1✔
480
        if q1 is None and q2 is None:
1✔
481
            raise dpt.ExecutionPlacementError(
×
482
                "Execution placement can not be unambiguously inferred "
483
                "from input arguments. "
484
                "One of the arguments must represent USM allocation and "
485
                "expose `__sycl_usm_array_interface__` property"
486
            )
487
        if q1 is None:
1✔
488
            exec_q = q2
1✔
489
            res_usm_type = o2_usm_type
1✔
490
        elif q2 is None:
1✔
491
            exec_q = q1
1✔
492
            res_usm_type = o1_usm_type
1✔
493
        else:
494
            exec_q = dpt.get_execution_queue((q1, q2))
1✔
495
            if exec_q is None:
1✔
496
                raise dpt.ExecutionPlacementError(
1✔
497
                    "Execution placement can not be unambiguously inferred "
498
                    "from input arguments."
499
                )
500
            res_usm_type = dpt.get_coerced_usm_type(
1✔
501
                (
502
                    o1_usm_type,
503
                    o2_usm_type,
504
                )
505
            )
506
        dpt.validate_usm_type(res_usm_type, allow_none=False)
1✔
507
        o1_shape = _get_shape(o1)
1✔
508
        o2_shape = _get_shape(o2)
1✔
509
        if not all(
1✔
510
            isinstance(s, (tuple, list))
511
            for s in (
512
                o1_shape,
513
                o2_shape,
514
            )
515
        ):
516
            raise TypeError(
×
517
                "Shape of arguments can not be inferred. "
518
                "Arguments are expected to be "
519
                "lists, tuples, or both"
520
            )
521
        try:
1✔
522
            res_shape = _broadcast_shape_impl(
1✔
523
                [
524
                    o1_shape,
525
                    o2_shape,
526
                ]
527
            )
528
        except ValueError:
1✔
529
            raise ValueError(
1✔
530
                "operands could not be broadcast together with shapes "
531
                f"{o1_shape} and {o2_shape}"
532
            )
533
        sycl_dev = exec_q.sycl_device
1✔
534
        o1_dtype = _get_dtype(o1, sycl_dev)
1✔
535
        o2_dtype = _get_dtype(o2, sycl_dev)
1✔
536
        if not all(_validate_dtype(o) for o in (o1_dtype, o2_dtype)):
1✔
537
            raise ValueError("Operands have unsupported data types")
×
538

539
        o1_dtype, o2_dtype = self.weak_type_resolver_(
1✔
540
            o1_dtype, o2_dtype, sycl_dev
541
        )
542

543
        buf1_dt, buf2_dt, res_dt = _find_buf_dtype2(
1✔
544
            o1_dtype,
545
            o2_dtype,
546
            self.result_type_resolver_fn_,
547
            sycl_dev,
548
            acceptance_fn=self.acceptance_fn_,
549
        )
550

551
        if res_dt is None:
1✔
552
            raise ValueError(
1✔
553
                f"function '{self.name_}' does not support input types "
554
                f"({o1_dtype}, {o2_dtype}), "
555
                "and the inputs could not be safely coerced to any "
556
                "supported types according to the casting rule ''safe''."
557
            )
558

559
        orig_out = out
1✔
560
        _manager = SequentialOrderManager[exec_q]
1✔
561
        if out is not None:
1✔
562
            if not isinstance(out, dpt.usm_ndarray):
1✔
563
                raise TypeError(
×
564
                    f"output array must be of usm_ndarray type, got {type(out)}"
565
                )
566

567
            if not out.flags.writable:
1✔
568
                raise ValueError("provided `out` array is read-only")
×
569

570
            if out.shape != res_shape:
1✔
571
                raise ValueError(
1✔
572
                    "The shape of input and output arrays are inconsistent. "
573
                    f"Expected output shape is {res_shape}, got {out.shape}"
574
                )
575

576
            if res_dt != out.dtype:
1✔
577
                raise ValueError(
1✔
578
                    f"Output array of type {res_dt} is needed, "
579
                    f"got {out.dtype}"
580
                )
581

582
            if dpt.get_execution_queue((exec_q, out.sycl_queue)) is None:
1✔
583
                raise dpt.ExecutionPlacementError(
×
584
                    "Input and output allocation queues are not compatible"
585
                )
586

587
            if isinstance(o1, dpt.usm_ndarray):
1✔
588
                if ti._array_overlap(o1, out) and buf1_dt is None:
1✔
589
                    if not ti._same_logical_tensors(o1, out):
1✔
590
                        out = dpt.empty_like(out)
1✔
591
                    elif self.binary_inplace_fn_ is not None:
1✔
592
                        # if there is a dedicated in-place kernel
593
                        # it can be called here, otherwise continues
594
                        if isinstance(o2, dpt.usm_ndarray):
1✔
595
                            src2 = o2
×
596
                            if (
×
597
                                ti._array_overlap(o2, out)
598
                                and not ti._same_logical_tensors(o2, out)
599
                                and buf2_dt is None
600
                            ):
601
                                buf2_dt = o2_dtype
×
602
                        else:
603
                            src2 = dpt.asarray(
1✔
604
                                o2, dtype=o2_dtype, sycl_queue=exec_q
605
                            )
606
                        if buf2_dt is None:
1✔
607
                            if src2.shape != res_shape:
1✔
608
                                src2 = dpt.broadcast_to(src2, res_shape)
1✔
609
                            dep_evs = _manager.submitted_events
1✔
610
                            ht_, comp_ev = self.binary_inplace_fn_(
1✔
611
                                lhs=o1,
612
                                rhs=src2,
613
                                sycl_queue=exec_q,
614
                                depends=dep_evs,
615
                            )
616
                            _manager.add_event_pair(ht_, comp_ev)
1✔
617
                        else:
618
                            buf2 = dpt.empty_like(src2, dtype=buf2_dt)
1✔
619
                            dep_evs = _manager.submitted_events
1✔
620
                            (
1✔
621
                                ht_copy_ev,
622
                                copy_ev,
623
                            ) = ti._copy_usm_ndarray_into_usm_ndarray(
624
                                src=src2,
625
                                dst=buf2,
626
                                sycl_queue=exec_q,
627
                                depends=dep_evs,
628
                            )
629
                            _manager.add_event_pair(ht_copy_ev, copy_ev)
1✔
630

631
                            buf2 = dpt.broadcast_to(buf2, res_shape)
1✔
632
                            ht_, bf_ev = self.binary_inplace_fn_(
1✔
633
                                lhs=o1,
634
                                rhs=buf2,
635
                                sycl_queue=exec_q,
636
                                depends=[copy_ev],
637
                            )
638
                            _manager.add_event_pair(ht_, bf_ev)
1✔
639

640
                        return out
1✔
641

642
            if isinstance(o2, dpt.usm_ndarray):
1✔
643
                if (
1✔
644
                    ti._array_overlap(o2, out)
645
                    and not ti._same_logical_tensors(o2, out)
646
                    and buf2_dt is None
647
                ):
648
                    # should not reach if out is reallocated
649
                    # after being checked against o1
650
                    out = dpt.empty_like(out)
1✔
651

652
        if isinstance(o1, dpt.usm_ndarray):
1✔
653
            src1 = o1
1✔
654
        else:
655
            src1 = dpt.asarray(o1, dtype=o1_dtype, sycl_queue=exec_q)
1✔
656
        if isinstance(o2, dpt.usm_ndarray):
1✔
657
            src2 = o2
1✔
658
        else:
659
            src2 = dpt.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q)
1✔
660

661
        if order == "A":
1✔
662
            order = (
1✔
663
                "F"
664
                if all(
665
                    arr.flags.f_contiguous
666
                    for arr in (
667
                        src1,
668
                        src2,
669
                    )
670
                )
671
                else "C"
672
            )
673

674
        if buf1_dt is None and buf2_dt is None:
1✔
675
            if out is None:
1✔
676
                if order == "K":
1✔
677
                    out = _empty_like_pair_orderK(
1✔
678
                        src1, src2, res_dt, res_shape, res_usm_type, exec_q
679
                    )
680
                else:
681
                    out = dpt.empty(
1✔
682
                        res_shape,
683
                        dtype=res_dt,
684
                        usm_type=res_usm_type,
685
                        sycl_queue=exec_q,
686
                        order=order,
687
                    )
688
            if src1.shape != res_shape:
1✔
689
                src1 = dpt.broadcast_to(src1, res_shape)
1✔
690
            if src2.shape != res_shape:
1✔
691
                src2 = dpt.broadcast_to(src2, res_shape)
1✔
692
            deps_ev = _manager.submitted_events
1✔
693
            ht_binary_ev, binary_ev = self.binary_fn_(
1✔
694
                src1=src1,
695
                src2=src2,
696
                dst=out,
697
                sycl_queue=exec_q,
698
                depends=deps_ev,
699
            )
700
            _manager.add_event_pair(ht_binary_ev, binary_ev)
1✔
701
            if not (orig_out is None or orig_out is out):
1✔
702
                # Copy the out data from temporary buffer to original memory
703
                ht_copy_out_ev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
1✔
704
                    src=out,
705
                    dst=orig_out,
706
                    sycl_queue=exec_q,
707
                    depends=[binary_ev],
708
                )
709
                _manager.add_event_pair(ht_copy_out_ev, cpy_ev)
1✔
710
                out = orig_out
1✔
711
            return out
1✔
712
        elif buf1_dt is None:
1✔
713
            if order == "K":
1✔
714
                buf2 = _empty_like_orderK(src2, buf2_dt)
1✔
715
            else:
716
                buf2 = dpt.empty_like(src2, dtype=buf2_dt, order=order)
×
717
            dep_evs = _manager.submitted_events
1✔
718
            ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
1✔
719
                src=src2, dst=buf2, sycl_queue=exec_q, depends=dep_evs
720
            )
721
            _manager.add_event_pair(ht_copy_ev, copy_ev)
1✔
722
            if out is None:
1✔
723
                if order == "K":
1✔
724
                    out = _empty_like_pair_orderK(
1✔
725
                        src1, buf2, res_dt, res_shape, res_usm_type, exec_q
726
                    )
727
                else:
728
                    out = dpt.empty(
×
729
                        res_shape,
730
                        dtype=res_dt,
731
                        usm_type=res_usm_type,
732
                        sycl_queue=exec_q,
733
                        order=order,
734
                    )
735

736
            if src1.shape != res_shape:
1✔
737
                src1 = dpt.broadcast_to(src1, res_shape)
1✔
738
            buf2 = dpt.broadcast_to(buf2, res_shape)
1✔
739
            ht_binary_ev, binary_ev = self.binary_fn_(
1✔
740
                src1=src1,
741
                src2=buf2,
742
                dst=out,
743
                sycl_queue=exec_q,
744
                depends=[copy_ev],
745
            )
746
            _manager.add_event_pair(ht_binary_ev, binary_ev)
1✔
747
            if not (orig_out is None or orig_out is out):
1✔
748
                # Copy the out data from temporary buffer to original memory
749
                ht_copy_out_ev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
×
750
                    src=out,
751
                    dst=orig_out,
752
                    sycl_queue=exec_q,
753
                    depends=[binary_ev],
754
                )
755
                _manager.add_event_pair(ht_copy_out_ev, cpy_ev)
×
756
                out = orig_out
×
757
            return out
1✔
758
        elif buf2_dt is None:
1✔
759
            if order == "K":
1✔
760
                buf1 = _empty_like_orderK(src1, buf1_dt)
1✔
761
            else:
762
                buf1 = dpt.empty_like(src1, dtype=buf1_dt, order=order)
×
763
            dep_evs = _manager.submitted_events
1✔
764
            ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
1✔
765
                src=src1, dst=buf1, sycl_queue=exec_q, depends=dep_evs
766
            )
767
            _manager.add_event_pair(ht_copy_ev, copy_ev)
1✔
768
            if out is None:
1✔
769
                if order == "K":
1✔
770
                    out = _empty_like_pair_orderK(
1✔
771
                        buf1, src2, res_dt, res_shape, res_usm_type, exec_q
772
                    )
773
                else:
774
                    out = dpt.empty(
×
775
                        res_shape,
776
                        dtype=res_dt,
777
                        usm_type=res_usm_type,
778
                        sycl_queue=exec_q,
779
                        order=order,
780
                    )
781

782
            buf1 = dpt.broadcast_to(buf1, res_shape)
1✔
783
            if src2.shape != res_shape:
1✔
784
                src2 = dpt.broadcast_to(src2, res_shape)
1✔
785
            ht_binary_ev, binary_ev = self.binary_fn_(
1✔
786
                src1=buf1,
787
                src2=src2,
788
                dst=out,
789
                sycl_queue=exec_q,
790
                depends=[copy_ev],
791
            )
792
            _manager.add_event_pair(ht_binary_ev, binary_ev)
1✔
793
            if not (orig_out is None or orig_out is out):
1✔
794
                # Copy the out data from temporary buffer to original memory
795
                ht_copy_out_ev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
×
796
                    src=out,
797
                    dst=orig_out,
798
                    sycl_queue=exec_q,
799
                    depends=[binary_ev],
800
                )
801
                _manager.add_event_pair(ht_copy_out_ev, cpy_ev)
×
802
                out = orig_out
×
803
            return out
1✔
804

805
        if order == "K":
1✔
806
            if src1.flags.c_contiguous and src2.flags.c_contiguous:
1✔
807
                order = "C"
1✔
808
            elif src1.flags.f_contiguous and src2.flags.f_contiguous:
1✔
809
                order = "F"
×
810
        if order == "K":
1✔
811
            buf1 = _empty_like_orderK(src1, buf1_dt)
1✔
812
        else:
813
            buf1 = dpt.empty_like(src1, dtype=buf1_dt, order=order)
1✔
814
        dep_evs = _manager.submitted_events
1✔
815
        ht_copy1_ev, copy1_ev = ti._copy_usm_ndarray_into_usm_ndarray(
1✔
816
            src=src1, dst=buf1, sycl_queue=exec_q, depends=dep_evs
817
        )
818
        _manager.add_event_pair(ht_copy1_ev, copy1_ev)
1✔
819
        if order == "K":
1✔
820
            buf2 = _empty_like_orderK(src2, buf2_dt)
1✔
821
        else:
822
            buf2 = dpt.empty_like(src2, dtype=buf2_dt, order=order)
1✔
823
        ht_copy2_ev, copy2_ev = ti._copy_usm_ndarray_into_usm_ndarray(
1✔
824
            src=src2, dst=buf2, sycl_queue=exec_q, depends=dep_evs
825
        )
826
        _manager.add_event_pair(ht_copy2_ev, copy2_ev)
1✔
827
        if out is None:
1✔
828
            if order == "K":
1✔
829
                out = _empty_like_pair_orderK(
1✔
830
                    buf1, buf2, res_dt, res_shape, res_usm_type, exec_q
831
                )
832
            else:
833
                out = dpt.empty(
1✔
834
                    res_shape,
835
                    dtype=res_dt,
836
                    usm_type=res_usm_type,
837
                    sycl_queue=exec_q,
838
                    order=order,
839
                )
840

841
        buf1 = dpt.broadcast_to(buf1, res_shape)
1✔
842
        buf2 = dpt.broadcast_to(buf2, res_shape)
1✔
843
        ht_, bf_ev = self.binary_fn_(
1✔
844
            src1=buf1,
845
            src2=buf2,
846
            dst=out,
847
            sycl_queue=exec_q,
848
            depends=[copy1_ev, copy2_ev],
849
        )
850
        _manager.add_event_pair(ht_, bf_ev)
1✔
851
        return out
1✔
852

853
    def _inplace_op(self, o1, o2):
1✔
854
        if self.binary_inplace_fn_ is None:
1✔
855
            raise ValueError(
×
856
                "binary function does not have a dedicated in-place "
857
                "implementation"
858
            )
859
        if not isinstance(o1, dpt.usm_ndarray):
1✔
860
            raise TypeError(
×
861
                "Expected first argument to be "
862
                f"dpnp.tensor.usm_ndarray, got {type(o1)}"
863
            )
864
        if not o1.flags.writable:
1✔
865
            raise ValueError("provided left-hand side array is read-only")
×
866
        q1, o1_usm_type = o1.sycl_queue, o1.usm_type
1✔
867
        q2, o2_usm_type = _get_queue_usm_type(o2)
1✔
868
        if q2 is None:
1✔
869
            exec_q = q1
1✔
870
            res_usm_type = o1_usm_type
1✔
871
        else:
872
            exec_q = dpt.get_execution_queue((q1, q2))
1✔
873
            if exec_q is None:
1✔
874
                raise dpt.ExecutionPlacementError(
×
875
                    "Execution placement can not be unambiguously inferred "
876
                    "from input arguments."
877
                )
878
            res_usm_type = dpt.get_coerced_usm_type(
1✔
879
                (
880
                    o1_usm_type,
881
                    o2_usm_type,
882
                )
883
            )
884
        dpt.validate_usm_type(res_usm_type, allow_none=False)
1✔
885
        o1_shape = o1.shape
1✔
886
        o2_shape = _get_shape(o2)
1✔
887
        if not isinstance(o2_shape, (tuple, list)):
1✔
888
            raise TypeError(
×
889
                "Shape of second argument can not be inferred. "
890
                "Expected list or tuple."
891
            )
892
        try:
1✔
893
            res_shape = _broadcast_shape_impl(
1✔
894
                [
895
                    o1_shape,
896
                    o2_shape,
897
                ]
898
            )
899
        except ValueError:
×
900
            raise ValueError(
×
901
                "operands could not be broadcast together with shapes "
902
                f"{o1_shape} and {o2_shape}"
903
            )
904

905
        if res_shape != o1_shape:
1✔
906
            raise ValueError(
×
907
                "The shape of the non-broadcastable left-hand "
908
                f"side {o1_shape} is inconsistent with the "
909
                f"broadcast shape {res_shape}."
910
            )
911

912
        sycl_dev = exec_q.sycl_device
1✔
913
        o1_dtype = o1.dtype
1✔
914
        o2_dtype = _get_dtype(o2, sycl_dev)
1✔
915
        if not _validate_dtype(o2_dtype):
1✔
916
            raise ValueError("Operand has an unsupported data type")
×
917

918
        o1_dtype, o2_dtype = self.weak_type_resolver_(
1✔
919
            o1_dtype, o2_dtype, sycl_dev
920
        )
921

922
        buf_dt, res_dt = _find_buf_dtype_in_place_op(
1✔
923
            o1_dtype,
924
            o2_dtype,
925
            self.result_type_resolver_fn_,
926
            sycl_dev,
927
        )
928

929
        if res_dt is None:
1✔
930
            raise ValueError(
1✔
931
                f"function '{self.name_}' does not support input types "
932
                f"({o1_dtype}, {o2_dtype}), "
933
                "and the inputs could not be safely coerced to any "
934
                "supported types according to the casting rule "
935
                "''same_kind''."
936
            )
937

938
        if res_dt != o1_dtype:
1✔
939
            raise ValueError(
1✔
940
                f"Output array of type {res_dt} is needed, " f"got {o1_dtype}"
941
            )
942

943
        _manager = SequentialOrderManager[exec_q]
1✔
944
        if isinstance(o2, dpt.usm_ndarray):
1✔
945
            src2 = o2
1✔
946
            if (
1✔
947
                ti._array_overlap(o2, o1)
948
                and not ti._same_logical_tensors(o2, o1)
949
                and buf_dt is None
950
            ):
951
                buf_dt = o2_dtype
×
952
        else:
953
            src2 = dpt.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q)
1✔
954
        if buf_dt is None:
1✔
955
            if src2.shape != res_shape:
1✔
956
                src2 = dpt.broadcast_to(src2, res_shape)
1✔
957
            dep_evs = _manager.submitted_events
1✔
958
            ht_, comp_ev = self.binary_inplace_fn_(
1✔
959
                lhs=o1,
960
                rhs=src2,
961
                sycl_queue=exec_q,
962
                depends=dep_evs,
963
            )
964
            _manager.add_event_pair(ht_, comp_ev)
1✔
965
        else:
966
            buf = dpt.empty_like(src2, dtype=buf_dt)
1✔
967
            dep_evs = _manager.submitted_events
1✔
968
            (
1✔
969
                ht_copy_ev,
970
                copy_ev,
971
            ) = ti._copy_usm_ndarray_into_usm_ndarray(
972
                src=src2,
973
                dst=buf,
974
                sycl_queue=exec_q,
975
                depends=dep_evs,
976
            )
977
            _manager.add_event_pair(ht_copy_ev, copy_ev)
1✔
978

979
            buf = dpt.broadcast_to(buf, res_shape)
1✔
980
            ht_, bf_ev = self.binary_inplace_fn_(
1✔
981
                lhs=o1,
982
                rhs=buf,
983
                sycl_queue=exec_q,
984
                depends=[copy_ev],
985
            )
986
            _manager.add_event_pair(ht_, bf_ev)
1✔
987

988
        return o1
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc