• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

IntelPython / dpctl / 14754782295

30 Apr 2025 12:39PM UTC coverage: 86.419%. Remained the same
14754782295

Pull #2068

github

web-flow
Merge c8700ceb2 into b7a6b67c7
Pull Request #2068: Correct a path to `cl.cfg` file

3020 of 3716 branches covered (81.27%)

Branch coverage included in aggregate %.

12195 of 13890 relevant lines covered (87.8%)

6998.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.42
/dpctl/_sycl_queue.pyx
1
#                      Data Parallel Control (dpctl)
1✔
2
#
3
# Copyright 2020-2025 Intel Corporation
4
#
5
# Licensed under the Apache License, Version 2.0 (the "License");
6
# you may not use this file except in compliance with the License.
7
# You may obtain a copy of the License at
8
#
9
#    http://www.apache.org/licenses/LICENSE-2.0
10
#
11
# Unless required by applicable law or agreed to in writing, software
12
# distributed under the License is distributed on an "AS IS" BASIS,
13
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
# See the License for the specific language governing permissions and
15
# limitations under the License.
16

17
# distutils: language = c++
18
# cython: language_level=3
19
# cython: linetrace=True
20

21
""" Implements SyclQueue Cython extension type.
22
"""
23

24
from ._backend cimport (  # noqa: E211
25
    DPCTLContext_Create,
26
    DPCTLContext_Delete,
27
    DPCTLDefaultSelector_Create,
28
    DPCTLDevice_Copy,
29
    DPCTLDevice_CreateFromSelector,
30
    DPCTLDevice_Delete,
31
    DPCTLDeviceMgr_GetCachedContext,
32
    DPCTLDeviceSelector_Delete,
33
    DPCTLEvent_Delete,
34
    DPCTLEvent_Wait,
35
    DPCTLFilterSelector_Create,
36
    DPCTLQueue_AreEq,
37
    DPCTLQueue_Copy,
38
    DPCTLQueue_Create,
39
    DPCTLQueue_Delete,
40
    DPCTLQueue_GetBackend,
41
    DPCTLQueue_GetContext,
42
    DPCTLQueue_GetDevice,
43
    DPCTLQueue_HasEnableProfiling,
44
    DPCTLQueue_Hash,
45
    DPCTLQueue_IsInOrder,
46
    DPCTLQueue_MemAdvise,
47
    DPCTLQueue_Memcpy,
48
    DPCTLQueue_MemcpyWithEvents,
49
    DPCTLQueue_Prefetch,
50
    DPCTLQueue_SubmitBarrierForEvents,
51
    DPCTLQueue_SubmitNDRange,
52
    DPCTLQueue_SubmitRange,
53
    DPCTLQueue_Wait,
54
    DPCTLSyclContextRef,
55
    DPCTLSyclDeviceSelectorRef,
56
    DPCTLSyclEventRef,
57
    DPCTLWorkGroupMemory_Available,
58
    DPCTLWorkGroupMemory_Create,
59
    DPCTLWorkGroupMemory_Delete,
60
    _arg_data_type,
61
    _backend_type,
62
    _md_local_accessor,
63
    _queue_property_type,
64
)
65
from .memory._memory cimport _Memory
66

67
import ctypes
1✔
68
import numbers
1✔
69

70
from .enum_types import backend_type
1✔
71

72
from cpython cimport pycapsule
73
from cpython.buffer cimport (
74
    Py_buffer,
75
    PyBUF_ANY_CONTIGUOUS,
76
    PyBUF_SIMPLE,
77
    PyBUF_WRITABLE,
78
    PyBuffer_Release,
79
    PyObject_CheckBuffer,
80
    PyObject_GetBuffer,
81
)
82
from cpython.ref cimport Py_INCREF, PyObject
83
from libc.stdlib cimport free, malloc
84

85
import collections.abc
1✔
86
import logging
1✔
87

88

89
cdef extern from "_host_task_util.hpp":
90
    DPCTLSyclEventRef async_dec_ref(
91
        DPCTLSyclQueueRef, PyObject **,
92
        size_t, DPCTLSyclEventRef *, size_t, int *
93
    ) nogil
94

95

96
__all__ = [
1✔
97
    "SyclQueue",
98
    "SyclKernelInvalidRangeError",
99
    "SyclKernelSubmitError",
100
    "SyclQueueCreationError",
101
]
102

103

104
_logger = logging.getLogger(__name__)
1✔
105

106

107
cdef class kernel_arg_type_attribute:
108
    cdef str parent_name
109
    cdef str attr_name
110
    cdef int attr_value
111

112
    def __cinit__(self, str parent, str name, int value):
113
        self.parent_name = parent
1✔
114
        self.attr_name = name
1✔
115
        self.attr_value = value
1✔
116

117
    def __repr__(self):
118
        return f"<{self.parent_name}.{self.attr_name}: {self.attr_value}>"
1✔
119

120
    def __str__(self):
121
        return f"<{self.parent_name}.{self.attr_name}: {self.attr_value}>"
1✔
122

123
    @property
124
    def name(self):
125
        return self.attr_name
1✔
126

127
    @property
128
    def value(self):
129
        return self.attr_value
1✔
130

131

132
cdef class LocalAccessor:
133
    """
134
    LocalAccessor(dtype, shape)
135

136
    Python class for specifying the dimensionality and type of a
137
    ``sycl::local_accessor``, to be used as a kernel argument type.
138

139
    Args:
140
        dtype (str):
141
            the data type of the local memory.
142
            The permitted values are
143

144
                `'i1'`, `'i2'`, `'i4'`, `'i8'`:
145
                    signed integral types int8_t, int16_t, int32_t, int64_t
146
                `'u1'`, `'u2'`, `'u4'`, `'u8'`
147
                    unsigned integral types uint8_t, uint16_t, uint32_t,
148
                    uint64_t
149
                `'f4'`, `'f8'`,
150
                    single- and double-precision floating-point types float and
151
                    double
152
        shape (tuple, list):
153
            Size of LocalAccessor dimensions. Dimension of the LocalAccessor is
154
            determined by the length of the tuple. Must be of length 1, 2, or 3,
155
            and contain only non-negative integers.
156

157
    Raises:
158
        TypeError:
159
            If the given shape is not a tuple or list.
160
        ValueError:
161
            If the given shape sequence is not between one and
162
            three elements long.
163
        TypeError:
164
            If the shape is not a sequence of integers.
165
        ValueError:
166
            If the shape contains a negative integer.
167
        ValueError:
168
            If the dtype string is unrecognized.
169
    """
170
    cdef _md_local_accessor lacc
171

172
    def __cinit__(self, str dtype, shape):
173
        if not isinstance(shape, (list, tuple)):
×
174
            raise TypeError(
×
175
                f"`shape` must be a list or tuple, got {type(shape)}"
×
176
            )
177
        ndim = len(shape)
×
178
        if ndim < 1 or ndim > 3:
×
179
            raise ValueError(
×
180
                "LocalAccessor must have dimension between one and three"
181
            )
182
        for s in shape:
×
183
            if not isinstance(s, numbers.Integral):
×
184
                raise TypeError(
×
185
                    "LocalAccessor shape must be a sequence of integers"
186
                )
187
            if s < 0:
×
188
                raise ValueError(
×
189
                    "LocalAccessor dimensions must be non-negative"
190
                )
191
        self.lacc.ndim = ndim
×
192
        self.lacc.dim0 = <size_t> shape[0]
×
193
        self.lacc.dim1 = <size_t> shape[1] if ndim > 1 else 1
×
194
        self.lacc.dim2 = <size_t> shape[2] if ndim > 2 else 1
×
195

196
        if dtype == "i1":
×
197
            self.lacc.dpctl_type_id = _arg_data_type._INT8_T
×
198
        elif dtype == "u1":
×
199
            self.lacc.dpctl_type_id = _arg_data_type._UINT8_T
×
200
        elif dtype == "i2":
×
201
            self.lacc.dpctl_type_id = _arg_data_type._INT16_T
×
202
        elif dtype == "u2":
×
203
            self.lacc.dpctl_type_id = _arg_data_type._UINT16_T
×
204
        elif dtype == "i4":
×
205
            self.lacc.dpctl_type_id = _arg_data_type._INT32_T
×
206
        elif dtype == "u4":
×
207
            self.lacc.dpctl_type_id = _arg_data_type._UINT32_T
×
208
        elif dtype == "i8":
×
209
            self.lacc.dpctl_type_id = _arg_data_type._INT64_T
×
210
        elif dtype == "u8":
×
211
            self.lacc.dpctl_type_id = _arg_data_type._UINT64_T
×
212
        elif dtype == "f4":
×
213
            self.lacc.dpctl_type_id = _arg_data_type._FLOAT
×
214
        elif dtype == "f8":
×
215
            self.lacc.dpctl_type_id = _arg_data_type._DOUBLE
×
216
        else:
217
            raise ValueError(f"Unrecognized type value: '{dtype}'")
×
218

219
    def __repr__(self):
220
        return f"LocalAccessor({self.lacc.ndim})"
×
221

222
    cdef size_t addressof(self):
1✔
223
        """
224
        Returns the address of the _md_local_accessor for this LocalAccessor
225
        cast to ``size_t``.
226
        """
227
        return <size_t>&self.lacc
×
228

229

230
cdef class _kernel_arg_type:
231
    """
232
    An enumeration of supported kernel argument types in
233
    :func:`dpctl.SyclQueue.submit`
234
    """
235
    cdef str _name
236

237
    def __cinit__(self):
238
        self._name = "kernel_arg_type"
1✔
239

240
    @property
241
    def __name__(self):
242
        return self._name
1✔
243

244
    def __repr__(self):
245
        return "<enum 'kernel_arg_type'>"
1✔
246

247
    def __str__(self):
248
        return "<enum 'kernel_arg_type'>"
1✔
249

250
    @property
251
    def dpctl_int8(self):
252
        cdef str p_name = "dpctl_int8"
1✔
253
        return kernel_arg_type_attribute(
1✔
254
            self._name,
255
            p_name,
256
            _arg_data_type._INT8_T
1✔
257
        )
258

259
    @property
260
    def dpctl_uint8(self):
261
        cdef str p_name = "dpctl_uint8"
1✔
262
        return kernel_arg_type_attribute(
1✔
263
            self._name,
264
            p_name,
265
            _arg_data_type._UINT8_T
1✔
266
        )
267

268
    @property
269
    def dpctl_int16(self):
270
        cdef str p_name = "dpctl_int16"
1✔
271
        return kernel_arg_type_attribute(
1✔
272
            self._name,
273
            p_name,
274
            _arg_data_type._INT16_T
1✔
275
        )
276

277
    @property
278
    def dpctl_uint16(self):
279
        cdef str p_name = "dpctl_uint16"
1✔
280
        return kernel_arg_type_attribute(
1✔
281
            self._name,
282
            p_name,
283
            _arg_data_type._UINT16_T
1✔
284
        )
285

286
    @property
287
    def dpctl_int32(self):
288
        cdef str p_name = "dpctl_int32"
1✔
289
        return kernel_arg_type_attribute(
1✔
290
            self._name,
291
            p_name,
292
            _arg_data_type._INT32_T
1✔
293
        )
294

295
    @property
296
    def dpctl_uint32(self):
297
        cdef str p_name = "dpctl_uint32"
1✔
298
        return kernel_arg_type_attribute(
1✔
299
            self._name,
300
            p_name,
301
            _arg_data_type._UINT32_T
1✔
302
        )
303

304
    @property
305
    def dpctl_int64(self):
306
        cdef str p_name = "dpctl_int64"
1✔
307
        return kernel_arg_type_attribute(
1✔
308
            self._name,
309
            p_name,
310
            _arg_data_type._INT64_T
1✔
311
        )
312

313
    @property
314
    def dpctl_uint64(self):
315
        cdef str p_name = "dpctl_uint64"
1✔
316
        return kernel_arg_type_attribute(
1✔
317
            self._name,
318
            p_name,
319
            _arg_data_type._UINT64_T
1✔
320
        )
321

322
    @property
323
    def dpctl_float32(self):
324
        cdef str p_name = "dpctl_float32"
1✔
325
        return kernel_arg_type_attribute(
1✔
326
            self._name,
327
            p_name,
328
            _arg_data_type._FLOAT
1✔
329
        )
330

331
    @property
332
    def dpctl_float64(self):
333
        cdef str p_name = "dpctl_float64"
1✔
334
        return kernel_arg_type_attribute(
1✔
335
            self._name,
336
            p_name,
337
            _arg_data_type._DOUBLE
1✔
338
        )
339

340
    @property
341
    def dpctl_void_ptr(self):
342
        cdef str p_name = "dpctl_void_ptr"
1✔
343
        return kernel_arg_type_attribute(
1✔
344
            self._name,
345
            p_name,
346
            _arg_data_type._VOID_PTR
1✔
347
        )
348

349
    @property
350
    def dpctl_local_accessor(self):
351
        cdef str p_name = "dpctl_local_accessor"
1✔
352
        return kernel_arg_type_attribute(
1✔
353
            self._name,
354
            p_name,
355
            _arg_data_type._LOCAL_ACCESSOR
1✔
356
        )
357

358
    @property
359
    def dpctl_work_group_memory(self):
360
        cdef str p_name = "dpctl_work_group_memory"
1✔
361
        return kernel_arg_type_attribute(
1✔
362
            self._name,
363
            p_name,
364
            _arg_data_type._WORK_GROUP_MEMORY
1✔
365
        )
366

367

368
kernel_arg_type = _kernel_arg_type()
1✔
369

370

371
cdef class SyclKernelSubmitError(Exception):
372
    """
373
    A ``SyclKernelSubmitError`` exception is raised when
374
    the provided :class:`.program.SyclKernel` could not be
375
    submitted to the :class:`.SyclQueue`.
376

377
    """
378
    pass
379

380

381
cdef class SyclKernelInvalidRangeError(Exception):
382
    """
383
    A ``SyclKernelInvalidRangeError`` is raised when the provided
384
    range has less than one or more than three dimensions.
385
    """
386
    pass
387

388

389
cdef class SyclQueueCreationError(Exception):
390
    """
391
    A ``SyclQueueCreationError`` exception is raised when a
392
    :class:`.SyclQueue` could not be created.
393

394
    :class:`.SyclQueue` creation can fail if the filter
395
    string is invalid, or the backend or device type values are not supported.
396

397
    """
398
    pass
399

400

401
cdef int _parse_queue_properties(object prop) except *:
1✔
402
    cdef int res = 0
1✔
403
    cdef object props
404
    if isinstance(prop, int):
1✔
405
        return <int>prop
1✔
406
    if not isinstance(prop, (tuple, list)):
1✔
407
        props = (prop, )
1✔
408
    else:
409
        props = prop
1✔
410
    for p in props:
1✔
411
        if isinstance(p, int):
1✔
412
            res = res | <int> p
1✔
413
        elif isinstance(p, str):
1✔
414
            if (p == "in_order"):
1✔
415
                res = res | _queue_property_type._IN_ORDER
1✔
416
            elif (p == "enable_profiling"):
1✔
417
                res = res | _queue_property_type._ENABLE_PROFILING
1✔
418
            elif (p == "default"):
1✔
419
                res = res | _queue_property_type._DEFAULT_PROPERTY
1✔
420
            else:
421
                raise ValueError(
1✔
422
                    (
423
                        "queue property '{}' is not understood, "
424
                        "expecting 'in_order', 'enable_profiling', or 'default'"
425
                    ).format(prop)
1✔
426
                )
427
        else:
428
            raise ValueError(
1✔
429
                "queue property '{}' is not understood.".format(prop)
1✔
430
            )
431
    return res
1✔
432

433

434
cdef void _queue_capsule_deleter(object o) noexcept:
1✔
435
    cdef DPCTLSyclQueueRef QRef = NULL
1✔
436
    if pycapsule.PyCapsule_IsValid(o, "SyclQueueRef"):
1✔
437
        QRef = <DPCTLSyclQueueRef> pycapsule.PyCapsule_GetPointer(
1✔
438
            o, "SyclQueueRef"
439
        )
440
        DPCTLQueue_Delete(QRef)
1✔
441
    elif pycapsule.PyCapsule_IsValid(o, "used_SyclQueueRef"):
1✔
442
        QRef = <DPCTLSyclQueueRef> pycapsule.PyCapsule_GetPointer(
1✔
443
            o, "used_SyclQueueRef"
444
        )
445
        DPCTLQueue_Delete(QRef)
1✔
446

447

448
cdef bint _is_buffer(object o):
1✔
449
    return PyObject_CheckBuffer(o)
1✔
450

451

452
cdef DPCTLSyclEventRef _memcpy_impl(
1✔
453
     SyclQueue q,
454
     object dst,
455
     object src,
456
     size_t byte_count,
457
     DPCTLSyclEventRef *dep_events,
458
     size_t dep_events_count
459
) except *:
460
    cdef void *c_dst_ptr = NULL
1✔
461
    cdef void *c_src_ptr = NULL
1✔
462
    cdef DPCTLSyclEventRef ERef = NULL
1✔
463
    cdef Py_buffer src_buf_view
464
    cdef Py_buffer dst_buf_view
465
    cdef bint src_is_buf = False
1✔
466
    cdef bint dst_is_buf = False
1✔
467
    cdef int ret_code = 0
1✔
468

469
    if isinstance(src, _Memory):
1✔
470
        c_src_ptr = <void*>(<_Memory>src).get_data_ptr()
1✔
471
    elif _is_buffer(src):
1✔
472
        ret_code = PyObject_GetBuffer(
1✔
473
            src, &src_buf_view, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS
474
        )
475
        if ret_code != 0:  # pragma: no cover
476
            raise RuntimeError("Could not access buffer")
×
477
        c_src_ptr = src_buf_view.buf
1✔
478
        src_is_buf = True
1✔
479
    else:
480
        raise TypeError(
1✔
481
             "Parameter `src` should have either type "
482
             "`dpctl.memory._Memory` or a type that "
483
             "supports Python buffer protocol"
484
        )
485

486
    if isinstance(dst, _Memory):
1✔
487
        c_dst_ptr = <void*>(<_Memory>dst).get_data_ptr()
1✔
488
    elif _is_buffer(dst):
1✔
489
        ret_code = PyObject_GetBuffer(
1✔
490
            dst, &dst_buf_view,
491
            PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE
492
        )
493
        if ret_code != 0:  # pragma: no cover
494
            if src_is_buf:
×
495
                PyBuffer_Release(&src_buf_view)
×
496
            raise RuntimeError("Could not access buffer")
×
497
        c_dst_ptr = dst_buf_view.buf
1✔
498
        dst_is_buf = True
1✔
499
    else:
500
        raise TypeError(
1✔
501
             "Parameter `dst` should have either type "
502
             "`dpctl.memory._Memory` or a type that "
503
             "supports Python buffer protocol"
504
        )
505

506
    if dep_events_count == 0 or dep_events is NULL:
1✔
507
        ERef = DPCTLQueue_Memcpy(q._queue_ref, c_dst_ptr, c_src_ptr, byte_count)
1✔
508
    else:
509
        ERef = DPCTLQueue_MemcpyWithEvents(
1✔
510
            q._queue_ref,
511
            c_dst_ptr,
512
            c_src_ptr,
513
            byte_count,
514
            dep_events,
515
            dep_events_count
1✔
516
        )
517

518
    if src_is_buf:
1✔
519
        PyBuffer_Release(&src_buf_view)
1✔
520
    if dst_is_buf:
1✔
521
        PyBuffer_Release(&dst_buf_view)
1✔
522

523
    return ERef
1✔
524

525

526
cdef class _SyclQueue:
527
    """ Barebone data owner class used by SyclQueue.
528
    """
529
    def __dealloc__(self):
530
        if (self._queue_ref):
1✔
531
            DPCTLQueue_Delete(self._queue_ref)
1✔
532
        # self._context is a Python object and will be GC-ed
533
        # self._device is a Python object
534

535

536
cdef class SyclQueue(_SyclQueue):
537
    """
538
    SyclQueue(*args, **kwargs)
539
    Python class representing ``sycl::queue``.
540

541
    There are multiple ways to create a :class:`dpctl.SyclQueue` object:
542

543
    - Invoking the constructor with no arguments creates a context using
544
      the default selector.
545

546
    :Example:
547
        .. code-block:: python
548

549
            import dpctl
550

551
            # Create a default SyclQueue
552
            q = dpctl.SyclQueue()
553
            print(q.sycl_device)
554

555
    - Invoking the constructor with specific filter selector string that
556
      creates a queue for the device corresponding to the filter string.
557

558
    :Example:
559
        .. code-block:: python
560

561
            import dpctl
562

563
            # Create in-order SyclQueue for either gpu, or cpu device
564
            q = dpctl.SyclQueue("gpu,cpu", property="in_order")
565
            print([q.sycl_device.is_gpu, q.sycl_device.is_cpu])
566

567
    - Invoking the constructor with a :class:`dpctl.SyclDevice` object
568
      creates a queue for that device, automatically finding/creating
569
      a :class:`dpctl.SyclContext` for the given device.
570

571
    :Example:
572
        .. code-block:: python
573

574
            import dpctl
575

576
            d = dpctl.SyclDevice("gpu")
577
            q = dpctl.SyclQueue(d)
578
            ctx = q.sycl_context
579
            print(q.sycl_device == d)
580
            print(any([ d == ctx_d for ctx_d in ctx.get_devices()]))
581

582
    - Invoking the constructor with a :class:`dpctl.SyclContext` and a
583
      :class:`dpctl.SyclDevice` creates a queue for given context and
584
      device.
585

586
    :Example:
587
        .. code-block:: python
588

589
            import dpctl
590

591
            # Create a CPU device using the opencl driver
592
            cpu_d = dpctl.SyclDevice("opencl:cpu")
593
            # Partition the CPU device into sub-devices with two cores each.
594
            sub_devices = cpu_d.create_sub_devices(partition=2)
595
            # Create a context common to all the sub-devices.
596
            ctx = dpctl.SyclContext(sub_devices)
597
            # create a queue for each sub-device using the common context
598
            queues = [dpctl.SyclQueue(ctx, sub_d) for sub_d in sub_devices]
599

600
    - Invoking the constructor with a named ``PyCapsule`` with the name
601
      **"SyclQueueRef"** that carries a pointer to a ``sycl::queue``
602
      object. The capsule will be renamed upon successful consumption
603
      to ensure one-time use. A new named capsule can be constructed by
604
      using :func:`dpctl.SyclQueue._get_capsule` method.
605

606
    Args:
607
        ctx (:class:`dpctl.SyclContext`, optional): Sycl context to create
608
            :class:`dpctl.SyclQueue` from. If not specified, a single-device
609
            context will be created from the specified device.
610
        dev (str, :class:`dpctl.SyclDevice`, capsule, optional): Sycl device
611
             to create :class:`dpctl.SyclQueue` from. If not specified, sycl
612
             device selected by ``sycl::default_selector`` is used.
613
             The argument must be explicitly specified if `ctxt` argument is
614
             provided.
615

616
             If `dev` is a named ``PyCapsule`` called **"SyclQueueRef"** and
617
             `ctxt` is not specified, :class:`dpctl.SyclQueue` instance is
618
             created from foreign `sycl::queue` object referenced by the
619
             capsule.
620
        property (str, tuple(str), list(str), optional): Defaults to None.
621
                The argument can be either "default", "in_order",
622
                "enable_profiling", or a tuple containing these.
623

624
    Raises:
625
        SyclQueueCreationError: If the :class:`dpctl.SyclQueue` object
626
                                creation failed.
627
        TypeError: In case of incorrect arguments given to constructors,
628
                   unexpected types of input arguments, or in the case the input
629
                   capsule contained a null pointer or could not be renamed.
630

631
    """
632
    def __cinit__(self, *args, **kwargs):
633
        cdef int len_args
634
        cdef int status = 0
1✔
635
        cdef const char *filter_c_str = NULL
1✔
636
        if len(args) > 2:
1✔
637
            raise TypeError(
1✔
638
                "SyclQueue constructor takes 0, 1, or 2 positinal arguments, "
639
                "but {} were given.".format(len(args))
1✔
640
            )
641
        props = _parse_queue_properties(
1✔
642
            kwargs.pop("property", _queue_property_type._DEFAULT_PROPERTY)
1✔
643
        )
644
        if (kwargs):
1✔
645
            raise TypeError(
1✔
646
                f"Unsupported keyword arguments {kwargs} to "
1✔
647
                "SyclQueue constructor encountered."
648
            )
649
        len_args = len(args)
1✔
650
        if len_args == 0:
1✔
651
            status = self._init_queue_default(props)
1✔
652
        elif len_args == 1:
653
            arg = args[0]
1✔
654
            if type(arg) is str:
1✔
655
                string = bytes(<str>arg, "utf-8")
1✔
656
                filter_c_str = string
1✔
657
                status = self._init_queue_from_filter_string(
1✔
658
                    filter_c_str, props)
659
            elif type(arg) is _SyclQueue:
1✔
660
                status = self._init_queue_from__SyclQueue(<_SyclQueue>arg)
1✔
661
            elif isinstance(arg, SyclDevice):
1✔
662
                status = self._init_queue_from_device(<SyclDevice>arg, props)
1✔
663
            elif pycapsule.PyCapsule_IsValid(arg, "SyclQueueRef"):
1✔
664
                status = self._init_queue_from_capsule(arg)
1✔
665
            else:
666
                raise TypeError(
1✔
667
                    "Positional argument {} is not a filter string or a "
668
                    "SyclDevice".format(arg)
1✔
669
                )
670
        else:
671
            ctx, dev = args
1✔
672
            if not isinstance(ctx, SyclContext):
1✔
673
                raise TypeError(
1✔
674
                    "SyclQueue constructor with two positional arguments "
675
                    "expected SyclContext as its first argument, but got {}."
676
                    .format(type(ctx))
1✔
677
                )
678
            if not isinstance(dev, SyclDevice):
1✔
679
                raise TypeError(
1✔
680
                    "SyclQueue constructor with two positional arguments "
681
                    "expected SyclDevice as its second argument, but got {}."
682
                    .format(type(dev))
1✔
683
                )
684
            status = self._init_queue_from_context_and_device(
1✔
685
                <SyclContext>ctx, <SyclDevice>dev, props
686
            )
687
        if status < 0:
1✔
688
            if status == -1:
1✔
689
                raise SyclQueueCreationError(
1✔
690
                    "Device filter selector string '{}' is not understood."
691
                    .format(arg)
1✔
692
                )
693
            elif status == -2 or status == -8:
1✔
694
                default_dev_error = (
695
                    "Default SYCL Device could not be created."
1✔
696
                )
697
                raise SyclQueueCreationError(
1✔
698
                    default_dev_error if (len_args == 0) else
1✔
699
                    "SYCL Device '{}' could not be created.".format(arg)
1✔
700
                )
701
            elif status == -3 or status == -7:
×
702
                raise SyclQueueCreationError(
×
703
                    "SYCL Context could not be created " +
704
                    "by default constructor" if len_args == 0 else
×
705
                    "from '{}'.".format(arg)
×
706
                )
707
            elif status == -4 or status == -6:
1✔
708
                if len_args == 2:
1✔
709
                    arg = args
1✔
710
                raise SyclQueueCreationError(
1✔
711
                    "SYCL Queue failed to be created from '{}'.".format(arg)
1✔
712
                )
713
            elif status == -5:
714
                raise TypeError(
×
715
                    "Input capsule {} contains a null pointer or could not "
716
                    "be renamed".format(arg)
×
717
                )
718

719
    cdef int _init_queue_from__SyclQueue(self, _SyclQueue other):
1✔
720
        """ Copy data container _SyclQueue fields over.
721
        """
722
        cdef DPCTLSyclQueueRef QRef = DPCTLQueue_Copy(other._queue_ref)
1✔
723
        if (QRef is NULL):
1✔
724
            return -4
×
725
        self._queue_ref = QRef
1✔
726
        self._context = other._context
1✔
727
        self._device = other._device
1✔
728

729
    cdef int _init_queue_from_DPCTLSyclDeviceRef(
1✔
730
        self, DPCTLSyclDeviceRef DRef, int props
731
    ):
732
        """
733
        Initializes self by creating SyclQueue with specified error handler and
734
        specified properties from the given device instance. SyclContext is
735
        looked-up by DPCTL from a cache to avoid repeated construction of new
736
        context for performance reasons.
737

738
        Returns: 0 : normal execution
739
                -3 : Context creation/look-up failed
740
                -4 : queue could not be created from context,device, error
741
                     handler and properties
742
        """
743
        cdef DPCTLSyclContextRef CRef
744
        cdef DPCTLSyclQueueRef QRef
745

746
        CRef = DPCTLDeviceMgr_GetCachedContext(DRef)
1✔
747
        if (CRef is NULL):
1✔
748
            # look-up failed (was not a root device?)
749
            # create a new context
750
            CRef = DPCTLContext_Create(DRef, NULL, 0)
1✔
751
            if (CRef is NULL):
1✔
752
                DPCTLDevice_Delete(DRef)
×
753
                return -3
×
754
        QRef = DPCTLQueue_Create(
1✔
755
            CRef,
756
            DRef,
757
            NULL,
758
            props
759
        )
760
        if QRef is NULL:
1✔
761
            DPCTLContext_Delete(CRef)
×
762
            DPCTLDevice_Delete(DRef)
×
763
            return -4
×
764
        _dev = SyclDevice._create(DRef)
1✔
765
        _ctxt = SyclContext._create(CRef)
1✔
766
        self._device = _dev
1✔
767
        self._context = _ctxt
1✔
768
        self._queue_ref = QRef
1✔
769
        return 0  # normal return
1✔
770

771
    cdef int _init_queue_from_filter_string(self, const char *c_str, int props):
1✔
772
        """
773
        Initializes self from filter string, error handler and properties.
774
        Creates device from device selector, then calls helper function above.
775

776
        Returns:
777
             0 : normal execution
778
            -1 : filter selector could not be created (malformed?)
779
            -2 : Device could not be created from filter selector
780
            -3 : Context creation/look-up failed
781
            -4 : queue could not be created from context,device, error handler
782
                 and properties
783
        """
784
        cdef DPCTLSyclDeviceSelectorRef DSRef = NULL
1✔
785
        cdef DPCTLSyclDeviceRef DRef = NULL
1✔
786
        cdef int ret = 0
1✔
787

788
        DSRef = DPCTLFilterSelector_Create(c_str)
1✔
789
        if DSRef is NULL:
1✔
790
            ret = -1  # Filter selector failed to be created
1✔
791
        else:
792
            DRef = DPCTLDevice_CreateFromSelector(DSRef)
1✔
793
            DPCTLDeviceSelector_Delete(DSRef)
1✔
794
            if (DRef is NULL):
1✔
795
                ret = -2  # Device could not be created
1✔
796
            else:
797
                ret = self._init_queue_from_DPCTLSyclDeviceRef(DRef, props)
1✔
798
        return ret
1✔
799

800
    cdef int _init_queue_from_device(self, SyclDevice dev, int props):
1✔
801
        cdef DPCTLSyclDeviceRef DRef = NULL
1✔
802
        # The DRef will be stored in self._device and freed when self._device
803
        # is garbage collected.
804
        DRef = DPCTLDevice_Copy(dev.get_device_ref())
1✔
805
        if (DRef is NULL):
1✔
806
            return -2  # Device could not be created
×
807
        else:
808
            return self._init_queue_from_DPCTLSyclDeviceRef(DRef, props)
1✔
809

810
    cdef int _init_queue_default(self, int props):
1✔
811
        cdef DPCTLSyclDeviceSelectorRef DSRef = DPCTLDefaultSelector_Create()
1✔
812
        cdef int ret = 0
1✔
813
        # The DRef will be stored in self._device and freed when self._device
814
        # is garbage collected.
815
        DRef = DPCTLDevice_CreateFromSelector(DSRef)
1✔
816
        DPCTLDeviceSelector_Delete(DSRef)
1✔
817
        if (DRef is NULL):
1✔
818
            ret = -2  # Device could not be created
×
819
        else:
820
            ret = self._init_queue_from_DPCTLSyclDeviceRef(DRef, props)
1✔
821
        return ret
1✔
822

823
    cdef int _init_queue_from_context_and_device(
1✔
824
        self, SyclContext ctxt, SyclDevice dev, int props
825
    ):
826
        cdef DPCTLSyclContextRef CRef = NULL
1✔
827
        cdef DPCTLSyclDeviceRef DRef = NULL
1✔
828
        cdef DPCTLSyclQueueRef QRef = NULL
1✔
829
        CRef = ctxt.get_context_ref()
1✔
830
        DRef = dev.get_device_ref()
1✔
831
        QRef = DPCTLQueue_Create(
1✔
832
            CRef,
833
            DRef,
834
            NULL,
835
            props
836
        )
837
        if (QRef is NULL):
1✔
838
            return -4
1✔
839
        self._device = dev
1✔
840
        self._context = ctxt
1✔
841
        self._queue_ref = QRef
1✔
842
        return 0  # normal return
1✔
843

844
    cdef int _init_queue_from_capsule(self, object cap):
1✔
845
        """
846
        For named PyCapsule with name "SyclQueueRef", which carries pointer to
847
        ``sycl::queue`` object, interpreted as ``DPCTLSyclQueueRef``, creates
848
        corresponding :class:`.SyclQueue`.
849
        """
850
        cdef DPCTLSyclContextRef CRef = NULL
1✔
851
        cdef DPCTLSyclDeviceRef DRef = NULL
1✔
852
        cdef DPCTLSyclQueueRef QRef = NULL
1✔
853
        cdef DPCTLSyclQueueRef QRef_copy = NULL
1✔
854
        cdef int ret = 0
1✔
855
        if pycapsule.PyCapsule_IsValid(cap, "SyclQueueRef"):
1✔
856
            QRef = <DPCTLSyclQueueRef> pycapsule.PyCapsule_GetPointer(
1✔
857
                cap, "SyclQueueRef"
858
            )
859
            if (QRef is NULL):
1✔
860
                return -5
×
861
            ret = pycapsule.PyCapsule_SetName(cap, "used_SyclQueueRef")
1✔
862
            if (ret):
1✔
863
                return -5
×
864
            QRef_copy = DPCTLQueue_Copy(QRef)
1✔
865
            if (QRef_copy is NULL):
1✔
866
                return -6
×
867
            CRef = DPCTLQueue_GetContext(QRef_copy)
1✔
868
            if (CRef is NULL):
1✔
869
                DPCTLQueue_Delete(QRef_copy)
×
870
                return -7
×
871
            DRef = DPCTLQueue_GetDevice(QRef_copy)
1✔
872
            if (DRef is NULL):
1✔
873
                DPCTLContext_Delete(CRef)
×
874
                DPCTLQueue_Delete(QRef_copy)
×
875
                return -8
×
876
            self._context = SyclContext._create(CRef)
1✔
877
            self._device = SyclDevice._create(DRef)
1✔
878
            self._queue_ref = QRef_copy
1✔
879
            return 0
1✔
880
        else:
881
            # __cinit__ checks that capsule is valid, so one can be here only
882
            # if call to `_init_queue_from_capsule` was made outside of
883
            # __cinit__ and the capsule was not checked to be valid.
884
            return -128
×
885

886
    @staticmethod
887
    cdef SyclQueue _create(DPCTLSyclQueueRef qref):
1✔
888
        """
889
        This function calls ``DPCTLQueue_Delete(qref)``.
890
        The user of this function must pass a copy to keep the
891
        qref argument alive.
892
        """
893
        if qref is NULL:
1✔
894
            raise SyclQueueCreationError("Queue creation failed.")
×
895
        cdef _SyclQueue ret = _SyclQueue.__new__(_SyclQueue)
1✔
896
        ret._context = SyclContext._create(DPCTLQueue_GetContext(qref))
1✔
897
        ret._device = SyclDevice._create(DPCTLQueue_GetDevice(qref))
1✔
898
        ret._queue_ref = qref
1✔
899
        # ret is a temporary, and will call DPCTLQueue_Delete(qref)
900
        return SyclQueue(ret)
1✔
901

902
    @staticmethod
903
    cdef SyclQueue _create_from_context_and_device(
1✔
904
        SyclContext ctx, SyclDevice dev, int props=0
905
    ):
906
        """
907
        Static factory method to create :class:`dpctl.SyclQueue` instance
908
        from given :class:`dpctl.SyclContext`, :class:`dpctl.SyclDevice`
909
        and optional integer ``props`` encoding the queue properties.
910
        """
911
        cdef _SyclQueue ret = _SyclQueue.__new__(_SyclQueue)
1✔
912
        cdef DPCTLSyclContextRef cref = ctx.get_context_ref()
1✔
913
        cdef DPCTLSyclDeviceRef dref = dev.get_device_ref()
1✔
914
        cdef DPCTLSyclQueueRef qref = NULL
1✔
915

916
        qref = DPCTLQueue_Create(
1✔
917
            cref,
918
            dref,
919
            NULL,
920
            props
921
        )
922
        if qref is NULL:
1✔
923
            raise SyclQueueCreationError("Queue creation failed.")
×
924
        ret._queue_ref = qref
1✔
925
        ret._context = ctx
1✔
926
        ret._device = dev
1✔
927
        return SyclQueue(ret)
1✔
928

929
    cdef int _populate_args(
1✔
930
        self,
931
        list args,
932
        void **kargs,
933
        _arg_data_type *kargty
934
    ):
935
        cdef int ret = 0
1✔
936
        for idx, arg in enumerate(args):
1✔
937
            if isinstance(arg, ctypes.c_char):
1✔
938
                kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
×
939
                kargty[idx] = _arg_data_type._INT8_T
×
940
            elif isinstance(arg, ctypes.c_uint8):
1✔
941
                kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
×
942
                kargty[idx] = _arg_data_type._UINT8_T
×
943
            elif isinstance(arg, ctypes.c_short):
1✔
944
                kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
1✔
945
                kargty[idx] = _arg_data_type._INT16_T
1✔
946
            elif isinstance(arg, ctypes.c_ushort):
1✔
947
                kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
×
948
                kargty[idx] = _arg_data_type._UINT16_T
×
949
            elif isinstance(arg, ctypes.c_int):
1✔
950
                kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
1✔
951
                kargty[idx] = _arg_data_type._INT32_T
1✔
952
            elif isinstance(arg, ctypes.c_uint):
1✔
953
                kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
1✔
954
                kargty[idx] = _arg_data_type._UINT32_T
1✔
955
            elif isinstance(arg, ctypes.c_longlong):
1✔
956
                kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
1✔
957
                kargty[idx] = _arg_data_type._INT64_T
1✔
958
            elif isinstance(arg, ctypes.c_ulonglong):
1✔
959
                kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
1✔
960
                kargty[idx] = _arg_data_type._UINT64_T
1✔
961
            elif isinstance(arg, ctypes.c_float):
1✔
962
                kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
1✔
963
                kargty[idx] = _arg_data_type._FLOAT
1✔
964
            elif isinstance(arg, ctypes.c_double):
1✔
965
                kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
1✔
966
                kargty[idx] = _arg_data_type._DOUBLE
1✔
967
            elif isinstance(arg, _Memory):
1✔
968
                kargs[idx]= <void*>(<size_t>arg._pointer)
1✔
969
                kargty[idx] = _arg_data_type._VOID_PTR
1✔
970
            elif isinstance(arg, WorkGroupMemory):
1✔
971
                kargs[idx] = <void*>(<size_t>arg._ref)
1✔
972
                kargty[idx] = _arg_data_type._WORK_GROUP_MEMORY
1✔
973
            elif isinstance(arg, LocalAccessor):
×
974
                kargs[idx] = <void*>((<LocalAccessor>arg).addressof())
×
975
                kargty[idx] = _arg_data_type._LOCAL_ACCESSOR
×
976
            else:
977
                ret = -1
×
978
        return ret
1✔
979

980
    cdef int _populate_range(self, size_t Range[3], list S, size_t nS):
1✔
981

982
        cdef int ret = 0
1✔
983

984
        if nS == 1:
1✔
985
            Range[0] = <size_t>S[0]
1✔
986
            Range[1] = 1
1✔
987
            Range[2] = 1
1✔
988
        elif nS == 2:
989
            Range[0] = <size_t>S[0]
1✔
990
            Range[1] = <size_t>S[1]
1✔
991
            Range[2] = 1
1✔
992
        elif nS == 3:
993
            Range[0] = <size_t>S[0]
1✔
994
            Range[1] = <size_t>S[1]
1✔
995
            Range[2] = <size_t>S[2]
1✔
996
        else:
997
            ret = -1
×
998

999
        return ret
1✔
1000

1001
    cdef cpp_bool equals(self, SyclQueue q):
1✔
1002
        """ Returns true if the :class:`.SyclQueue` argument ``q`` has the
1003
            same ``._queue_ref`` attribute as this :class:`.SyclQueue`.
1004
        """
1005
        return DPCTLQueue_AreEq(self._queue_ref, q.get_queue_ref())
1✔
1006

1007
    def __eq__(self, other):
1008
        """
1009
        Returns True if two :class:`dpctl.SyclQueue` compared arguments have
1010
        the same underlying ``DPCTLSyclQueueRef`` object.
1011

1012
        Returns:
1013
            bool:
1014
                ``True`` if the two :class:`dpctl.SyclQueue` objects
1015
                point to the same ``DPCTLSyclQueueRef`` object, otherwise
1016
                ``False``.
1017
        """
1018
        if isinstance(other, SyclQueue):
1✔
1019
            return self.equals(<SyclQueue> other)
1✔
1020
        else:
1021
            return False
1✔
1022

1023
    @property
1024
    def backend(self):
1025
        """ Returns the ``backend_type`` enum value for this queue.
1026

1027
        Returns:
1028
            backend_type:
1029
                The backend for the queue.
1030
        """
1031
        cdef _backend_type BE = DPCTLQueue_GetBackend(self._queue_ref)
1✔
1032
        if BE == _backend_type._OPENCL:
1✔
1033
            return backend_type.opencl
1✔
1034
        elif BE == _backend_type._LEVEL_ZERO:
1035
            return backend_type.level_zero
×
1036
        elif BE == _backend_type._CUDA:
1037
            return backend_type.cuda
×
1038
        elif BE == _backend_type._HIP:
1039
            return backend_type.hip
×
1040
        else:
1041
            raise ValueError("Unknown backend type.")
×
1042

1043
    @property
1044
    def sycl_context(self):
1045
        """
1046
        Returns :class:`SyclContext` underlying this queue.
1047

1048
        Returns:
1049
            :class:`SyclContext`
1050
                SYCL context underlying this queue
1051
        """
1052
        return self._context
1✔
1053

1054
    @property
1055
    def sycl_device(self):
1056
        """
1057
        Returns :class:`.SyclDevice` targeted by this queue.
1058

1059
        Returns:
1060
            :class:`SyclDevice`
1061
                SYCL device targeted by this queue
1062
        """
1063
        return self._device
1✔
1064

1065
    cpdef SyclContext get_sycl_context(self):
1✔
1066
        return self._context
1✔
1067

1068
    cpdef SyclDevice get_sycl_device(self):
1✔
1069
        return self._device
1✔
1070

1071
    cdef DPCTLSyclQueueRef get_queue_ref(self):
1✔
1072
        return self._queue_ref
1✔
1073

1074
    def addressof_ref(self):
1✔
1075
        """
1076
        Returns the address of the C API ``DPCTLSyclQueueRef`` pointer as
1077
        integral value of type ``size_t``.
1078

1079
        Returns:
1080
            int:
1081
                The address of the ``DPCTLSyclQueueRef`` object used to create
1082
                this :class:`dpctl.SyclQueue` object cast to ``size_t`` type.
1083
        """
1084
        return <size_t>self._queue_ref
1✔
1085

1086
    cpdef SyclEvent _submit_keep_args_alive(
1✔
1087
        self,
1088
        object args,
1089
        list dEvents
1090
    ):
1091
        """ SyclQueue._submit_keep_args_alive(args, events)
1092

1093
        Keeps objects in ``args`` alive until tasks associated with events
1094
        complete.
1095

1096
        Args:
1097
            args(object):
1098
                Python object to keep alive.
1099
                Typically a tuple with arguments to offloaded tasks
1100
            events(Tuple[dpctl.SyclEvent]):
1101
                Gating events.
1102
                The list or tuple of events associated with tasks
1103
                working on Python objects collected in ``args``.
1104
        Returns:
1105
            dpctl.SyclEvent
1106
               The event associated with the submission of host task.
1107

1108
        Increments reference count of ``args`` and schedules asynchronous
1109
        ``host_task`` to decrement the count once dependent events are
1110
        complete.
1111

1112
        .. note::
1113
            The ``host_task`` attempts to acquire Python GIL, and it is
1114
            known to be unsafe during interpreter shutdown sequence. It is
1115
            thus strongly advised to ensure that all submitted ``host_task``
1116
            complete before the end of the Python script.
1117
        """
1118
        cdef size_t nDE = len(dEvents)
1✔
1119
        cdef DPCTLSyclEventRef *depEvents = NULL
1✔
1120
        cdef PyObject *args_raw = NULL
1✔
1121
        cdef DPCTLSyclEventRef htERef = NULL
1✔
1122
        cdef int status = -1
1✔
1123

1124
        # Create the array of dependent events if any
1125
        if nDE > 0:
1✔
1126
            depEvents = (
1127
                <DPCTLSyclEventRef*>malloc(nDE*sizeof(DPCTLSyclEventRef))
1✔
1128
            )
1129
            if not depEvents:
1✔
1130
                raise MemoryError()
×
1131
            else:
1132
                for idx, de in enumerate(dEvents):
1✔
1133
                    if isinstance(de, SyclEvent):
1✔
1134
                        depEvents[idx] = (<SyclEvent>de).get_event_ref()
1✔
1135
                    else:
1136
                        free(depEvents)
×
1137
                        raise TypeError(
×
1138
                            "A sequence of dpctl.SyclEvent is expected"
1139
                        )
1140

1141
        # increment reference counts to list of arguments
1142
        Py_INCREF(args)
1✔
1143

1144
        # schedule decrement
1145
        args_raw = <PyObject *>args
1✔
1146

1147
        htERef = async_dec_ref(
1✔
1148
            self.get_queue_ref(),
1✔
1149
            &args_raw, 1,
1150
            depEvents, nDE, &status
1151
        )
1152

1153
        free(depEvents)
1✔
1154
        if (status != 0):
1✔
1155
            with nogil:
×
1156
                DPCTLEvent_Wait(htERef)
×
1157
            DPCTLEvent_Delete(htERef)
×
1158
            raise RuntimeError("Could not submit keep_args_alive host_task")
×
1159

1160
        return SyclEvent._create(htERef)
1✔
1161

1162
    cpdef SyclEvent submit_async(
1✔
1163
        self,
1164
        SyclKernel kernel,
1165
        list args,
1166
        list gS,
1167
        list lS=None,
1168
        list dEvents=None
1✔
1169
    ):
1170
        """
1171
        Asynchronously submit :class:`dpctl.program.SyclKernel` for execution.
1172

1173
        Args:
1174
            kernel (dpctl.program.SyclKernel):
1175
                SYCL kernel object
1176
            args (List[object]):
1177
                List of kernel arguments
1178
            gS (List[int]):
1179
                Global iteration range. Must be a list of length 1, 2, or 3.
1180
            lS (List[int], optional):
1181
                Local iteration range. Must be ``None`` or have the same
1182
                length as ``gS`` and each element of ``gS`` must be divisible
1183
                by respective element of ``lS``.
1184
            dEvents (List[dpctl.SyclEvent], optional):
1185
                List of events indicating ordering of this task relative
1186
                to tasks associated with specified events.
1187

1188
        Returns:
1189
            dpctl.SyclEvent:
1190
                An event associated with submission of the kernel.
1191

1192
        .. note::
1193
            One must ensure that the lifetime of all kernel arguments
1194
            extends after the submitted task completes. It is not a concern for
1195
            scalar arguments since they are passed by value, but for
1196
            objects representing USM allocations which are passed to the kernel
1197
            as unified address space pointers.
1198

1199
            One way of accomplishing this is to use
1200
            :meth:`dpctl.SyclQueue._submit_keep_args_alive`.
1201
        """
1202
        cdef void **kargs = NULL
1✔
1203
        cdef _arg_data_type *kargty = NULL
1✔
1204
        cdef DPCTLSyclEventRef *depEvents = NULL
1✔
1205
        cdef DPCTLSyclEventRef Eref = NULL
1✔
1206
        cdef int ret = 0
1✔
1207
        cdef size_t gRange[3]
1208
        cdef size_t lRange[3]
1209
        cdef size_t nGS = len(gS)
1✔
1210
        cdef size_t nLS = len(lS) if lS is not None else 0
1✔
1211
        cdef size_t nDE = len(dEvents) if dEvents is not None else 0
1✔
1212

1213
        # Allocate the arrays to be sent to DPCTLQueue_Submit
1214
        kargs = <void**>malloc(len(args) * sizeof(void*))
1✔
1215
        if not kargs:
1✔
1216
            raise MemoryError()
×
1217
        kargty = (
1218
            <_arg_data_type*>malloc(len(args)*sizeof(_arg_data_type))
1✔
1219
        )
1220
        if not kargty:
1✔
1221
            free(kargs)
×
1222
            raise MemoryError()
×
1223
        # Create the array of dependent events if any
1224
        if dEvents is not None and nDE > 0:
1✔
1225
            depEvents = (
1226
                <DPCTLSyclEventRef*>malloc(nDE*sizeof(DPCTLSyclEventRef))
1✔
1227
            )
1228
            if not depEvents:
1✔
1229
                free(kargs)
×
1230
                free(kargty)
×
1231
                raise MemoryError()
×
1232
            else:
1233
                for idx, de in enumerate(dEvents):
1✔
1234
                    if isinstance(de, SyclEvent):
1✔
1235
                        depEvents[idx] = (<SyclEvent>de).get_event_ref()
1✔
1236
                    else:
1237
                        free(kargs)
×
1238
                        free(kargty)
×
1239
                        free(depEvents)
×
1240
                        raise TypeError(
×
1241
                            "A sequence of dpctl.SyclEvent is expected"
1242
                        )
1243

1244
        # populate the args and argstype arrays
1245
        ret = self._populate_args(args, kargs, kargty)
1✔
1246
        if ret == -1:
1✔
1247
            free(kargs)
×
1248
            free(kargty)
×
1249
            free(depEvents)
×
1250
            raise TypeError("Unsupported type for a kernel argument")
×
1251

1252
        if lS is None:
1✔
1253
            ret = self._populate_range(gRange, gS, nGS)
1✔
1254
            if ret == -1:
1✔
1255
                free(kargs)
×
1256
                free(kargty)
×
1257
                free(depEvents)
×
1258
                raise SyclKernelInvalidRangeError(
×
1259
                    "Range with ", nGS, " not allowed. Range can only have "
×
1260
                    "between one and three dimensions."
1261
                )
1262
            Eref = DPCTLQueue_SubmitRange(
1✔
1263
                kernel.get_kernel_ref(),
1✔
1264
                self.get_queue_ref(),
1✔
1265
                kargs,
1266
                kargty,
1267
                len(args),
1✔
1268
                gRange,
1269
                nGS,
1270
                depEvents,
1271
                nDE
1272
            )
1273
        else:
1274
            ret = self._populate_range(gRange, gS, nGS)
1✔
1275
            if ret == -1:
1✔
1276
                free(kargs)
×
1277
                free(kargty)
×
1278
                free(depEvents)
×
1279
                raise SyclKernelInvalidRangeError(
×
1280
                    "Range with ", nGS, " not allowed. Range can only have "
×
1281
                    "between one and three dimensions."
1282
                )
1283
            ret = self._populate_range(lRange, lS, nLS)
1✔
1284
            if ret == -1:
1✔
1285
                free(kargs)
×
1286
                free(kargty)
×
1287
                free(depEvents)
×
1288
                raise SyclKernelInvalidRangeError(
×
1289
                    "Range with ", nLS, " not allowed. Range can only have "
×
1290
                    "between one and three dimensions."
1291
                )
1292
            if nGS != nLS:
1✔
1293
                free(kargs)
×
1294
                free(kargty)
×
1295
                free(depEvents)
×
1296
                raise ValueError(
×
1297
                    "Local and global ranges need to have same "
1298
                    "number of dimensions."
1299
                )
1300
            Eref = DPCTLQueue_SubmitNDRange(
1✔
1301
                kernel.get_kernel_ref(),
1✔
1302
                self.get_queue_ref(),
1✔
1303
                kargs,
1304
                kargty,
1305
                len(args),
1✔
1306
                gRange,
1307
                lRange,
1308
                nGS,
1309
                depEvents,
1310
                nDE
1311
            )
1312
        free(kargs)
1✔
1313
        free(kargty)
1✔
1314
        free(depEvents)
1✔
1315

1316
        if Eref is NULL:
1✔
1317
            raise SyclKernelSubmitError(
×
1318
                "Kernel submission to Sycl queue failed."
1319
            )
1320

1321
        return SyclEvent._create(Eref)
1✔
1322

1323
    cpdef SyclEvent submit(
1✔
1324
        self,
1325
        SyclKernel kernel,
1326
        list args,
1327
        list gS,
1328
        list lS=None,
1329
        list dEvents=None
1✔
1330
    ):
1331
        """
1332
        Submit :class:`dpctl.program.SyclKernel` for execution.
1333

1334
        Args:
1335
            kernel (dpctl.program.SyclKernel):
1336
                SYCL kernel object
1337
            args (List[object]):
1338
                List of kernel arguments
1339
            gS (List[int]):
1340
                Global iteration range. Must be a list of length 1, 2, or 3.
1341
            lS (List[int], optional):
1342
                Local iteration range. Must be ``None`` or have the same
1343
                length as ``gS`` and each element of ``gS`` must be divisible
1344
                by respective element of ``lS``.
1345
            dEvents (List[dpctl.SyclEvent], optional):
1346
                List of events indicating ordering of this task relative
1347
                to tasks associated with specified events.
1348

1349
        Returns:
1350
            dpctl.SyclEvent:
1351
                An event which is always complete. May be ignored.
1352

1353
        .. note::
1354
            :meth:`dpctl.SyclQueue.submit` is a synchronizing method.
1355
            Use :meth:`dpctl.SyclQueue.submit_async` to avoid synchronization.
1356
        """
1357
        cdef SyclEvent e = self.submit_async(kernel, args, gS, lS, dEvents)
1✔
1358
        e.wait()
1✔
1359
        return e
1✔
1360

1361
    cpdef void wait(self):
1✔
1362
        with nogil:
1✔
1363
            DPCTLQueue_Wait(self._queue_ref)
1✔
1364

1365
    cpdef memcpy(self, dest, src, size_t count):
1✔
1366
        """Copy memory from `src` to `dst`"""
1367
        cdef DPCTLSyclEventRef ERef = NULL
1✔
1368

1369
        ERef = _memcpy_impl(<SyclQueue>self, dest, src, count, NULL, 0)
1✔
1370
        if (ERef is NULL):
1✔
1371
            raise RuntimeError(
×
1372
                "SyclQueue.memcpy operation encountered an error"
1373
            )
1374
        with nogil:
1✔
1375
            DPCTLEvent_Wait(ERef)
1✔
1376
        DPCTLEvent_Delete(ERef)
1✔
1377

1378
    cpdef SyclEvent memcpy_async(
1✔
1379
        self, dest, src, size_t count, list dEvents=None
1✔
1380
    ):
1381
        """Copy memory from ``src`` to ``dst``"""
1382
        cdef DPCTLSyclEventRef ERef = NULL
1✔
1383
        cdef DPCTLSyclEventRef *depEvents = NULL
1✔
1384
        cdef size_t nDE = 0
1✔
1385

1386
        if dEvents is None:
1✔
1387
            ERef = _memcpy_impl(<SyclQueue>self, dest, src, count, NULL, 0)
1✔
1388
        else:
1389
            nDE = len(dEvents)
1✔
1390
            depEvents = (
1391
                <DPCTLSyclEventRef*>malloc(nDE*sizeof(DPCTLSyclEventRef))
1✔
1392
            )
1393
            if depEvents is NULL:
1✔
1394
                raise MemoryError()
×
1395
            else:
1396
                for idx, de in enumerate(dEvents):
1✔
1397
                    if isinstance(de, SyclEvent):
1✔
1398
                        depEvents[idx] = (<SyclEvent>de).get_event_ref()
1✔
1399
                    else:
1400
                        free(depEvents)
×
1401
                        raise TypeError(
×
1402
                            "A sequence of dpctl.SyclEvent is expected"
1403
                        )
1404
            ERef = _memcpy_impl(self, dest, src, count, depEvents, nDE)
1✔
1405
            free(depEvents)
1✔
1406

1407
        if (ERef is NULL):
1✔
1408
            raise RuntimeError(
×
1409
                "SyclQueue.memcpy operation encountered an error"
1410
            )
1411

1412
        return SyclEvent._create(ERef)
1✔
1413

1414
    cpdef prefetch(self, mem, size_t count=0):
1✔
1415
        cdef void *ptr
1416
        cdef DPCTLSyclEventRef ERef = NULL
1✔
1417

1418
        if isinstance(mem, _Memory):
1✔
1419
            ptr = <void*>(<_Memory>mem).get_data_ptr()
1✔
1420
        else:
1421
            raise TypeError("Parameter `mem` should have type _Memory")
1✔
1422

1423
        if (count <=0 or count > mem.nbytes):
1✔
1424
            count = mem.nbytes
×
1425

1426
        ERef = DPCTLQueue_Prefetch(self._queue_ref, ptr, count)
1✔
1427
        if (ERef is NULL):
1✔
1428
            raise RuntimeError("SyclQueue.prefetch encountered an error")
×
1429
        with nogil:
1✔
1430
            DPCTLEvent_Wait(ERef)
1✔
1431
        DPCTLEvent_Delete(ERef)
1✔
1432

1433
    cpdef mem_advise(self, mem, size_t count, int advice):
1✔
1434
        cdef void *ptr
1435
        cdef DPCTLSyclEventRef ERef = NULL
1✔
1436

1437
        if isinstance(mem, _Memory):
1✔
1438
            ptr = <void*>(<_Memory>mem).get_data_ptr()
1✔
1439
        else:
1440
            raise TypeError("Parameter `mem` should have type _Memory")
1✔
1441

1442
        if (count <=0 or count > mem.nbytes):
1✔
1443
            count = mem.nbytes
×
1444

1445
        ERef = DPCTLQueue_MemAdvise(self._queue_ref, ptr, count, advice)
1✔
1446
        if (ERef is NULL):
1✔
1447
            raise RuntimeError(
×
1448
                "SyclQueue.mem_advise operation encountered an error"
1449
            )
1450
        with nogil:
1✔
1451
            DPCTLEvent_Wait(ERef)
1✔
1452
        DPCTLEvent_Delete(ERef)
1✔
1453

1454
    @property
1455
    def is_in_order(self):
1456
        """``True`` if :class:`.SyclQueue`` is in-order,
1457
        ``False`` if it is out-of-order.
1458

1459
        :Example:
1460

1461
            ..code-block:: python
1462

1463
                >>> import dpctl
1464
                >>> q = dpctl.SyclQueue("cpu")
1465
                >>> q.is_in_order
1466
                False
1467
                >>> q = dpctl.SyclQueue("cpu", property="in_order")
1468
                >>> q.is_in_order
1469
                True
1470

1471
        Returns:
1472
            bool:
1473
                Indicates whether this :class:`.SyclQueue` was created
1474
                with ``property="in_order"``.
1475

1476
        .. note::
1477
            Unless requested otherwise, :class:`.SyclQueue` is constructed
1478
            to support out-of-order execution.
1479
        """
1480
        return DPCTLQueue_IsInOrder(self._queue_ref)
1✔
1481

1482
    @property
1483
    def has_enable_profiling(self):
1484
        """
1485
        ``True`` if :class:`.SyclQueue` was constructed with
1486
        ``"enabled_profiling"`` property, ``False`` otherwise.
1487

1488
        :Example:
1489

1490
            ..code-block:: python
1491

1492
                >>> import dpctl
1493
                >>> q = dpctl.SyclQueue("cpu")
1494
                >>> q.has_enable_profiling
1495
                False
1496
                >>> q = dpctl.SyclQueue("cpu", property="enable_profiling")
1497
                >>> q.has_enable_profiling
1498
                True
1499

1500
        Returns:
1501
            bool:
1502
                Whether profiling information for tasks submitted
1503
                to this :class:`.SyclQueue` is being collected.
1504

1505
        .. note::
1506
            Profiling information can be accessed using
1507
            properties
1508
            :attr:`dpctl.SyclEvent.profiling_info_submit`,
1509
            :attr:`dpctl.SyclEvent.profiling_info_start`, and
1510
            :attr:`dpctl.SyclEvent.profiling_info_end`. It is
1511
            also necessary for proper working of
1512
            :class:`dpctl.SyclTimer`.
1513

1514
            Collection of profiling information is not enabled
1515
            by default.
1516
        """
1517
        return DPCTLQueue_HasEnableProfiling(self._queue_ref)
1✔
1518

1519
    @property
1520
    def __name__(self):
1521
        "The name of :class:`dpctl.SyclQueue` object"
1522
        return "SyclQueue"
1✔
1523

1524
    def __repr__(self):
1525
        cdef cpp_bool in_order = DPCTLQueue_IsInOrder(self._queue_ref)
1✔
1526
        cdef cpp_bool en_prof = DPCTLQueue_HasEnableProfiling(self._queue_ref)
1✔
1527
        if in_order or en_prof:
1✔
1528
            prop = []
1✔
1529
            if in_order:
1✔
1530
                prop.append("in_order")
1✔
1531
            if en_prof:
1✔
1532
                prop.append("enable_profiling")
1✔
1533
            return (
1✔
1534
                "<dpctl."
1535
                + self.__name__
1✔
1536
                + " at {}, property={}>".format(hex(id(self)), prop)
1✔
1537
            )
1538
        else:
1539
            return "<dpctl." + self.__name__ + " at {}>".format(hex(id(self)))
1✔
1540

1541
    def __hash__(self):
1542
        """
1543
        Returns a hash value by hashing the underlying ``sycl::queue`` object.
1544

1545
        Returns:
1546
            int:
1547
                Hash value of this :class:`.SyclQueue` instance
1548
        """
1549
        return DPCTLQueue_Hash(self._queue_ref)
1✔
1550

1551
    def _get_capsule(self):
1✔
1552
        cdef DPCTLSyclQueueRef QRef = NULL
1✔
1553
        QRef = DPCTLQueue_Copy(self._queue_ref)
1✔
1554
        if (QRef is NULL):
1✔
1555
            raise ValueError("SyclQueue copy failed.")
×
1556
        return pycapsule.PyCapsule_New(
1✔
1557
            <void *>QRef, "SyclQueueRef", &_queue_capsule_deleter
1✔
1558
        )
1559

1560
    cpdef SyclEvent submit_barrier(self, dependent_events=None):
1✔
1561
        """
1562
        Submits a barrier to this queue.
1563

1564
        Args:
1565
            dependent_events:
1566
                List[dpctl.SyclEvent]:
1567
                    List or tuple of events that must complete
1568
                    before this task may begin execution.
1569

1570
        Returns:
1571
            dpctl.SyclEvent:
1572
                Event associated with the submitted task
1573
        """
1574
        cdef DPCTLSyclEventRef *depEvents = NULL
1✔
1575
        cdef DPCTLSyclEventRef ERef = NULL
1✔
1576
        cdef size_t nDE = 0
1✔
1577
        # Create the array of dependent events if any
1578
        if (dependent_events is None or
1✔
1579
            (isinstance(dependent_events, collections.abc.Sequence) and
1✔
1580
             all([type(de) is SyclEvent for de in dependent_events]))):
1✔
1581
            nDE = 0 if dependent_events is None else len(dependent_events)
1✔
1582
        else:
1583
            raise TypeError(
1✔
1584
                "dependent_events must either None, or a sequence of "
1585
                ":class:`dpctl.SyclEvent` objects")
1586
        if nDE > 0:
1✔
1587
            depEvents = (
1588
                <DPCTLSyclEventRef*>malloc(nDE*sizeof(DPCTLSyclEventRef))
1✔
1589
            )
1590
            if not depEvents:
1✔
1591
                raise MemoryError()
×
1592
            else:
1593
                for idx, de in enumerate(dependent_events):
1✔
1594
                    depEvents[idx] = (<SyclEvent>de).get_event_ref()
1✔
1595

1596
        ERef = DPCTLQueue_SubmitBarrierForEvents(
1✔
1597
            self.get_queue_ref(), depEvents, nDE)
1✔
1598
        if (depEvents is not NULL):
1✔
1599
            free(depEvents)
1✔
1600
        if ERef is NULL:
1✔
1601
            raise SyclKernelSubmitError(
×
1602
                "Barrier submission to Sycl queue failed."
1603
            )
1604

1605
        return SyclEvent._create(ERef)
1✔
1606

1607
    @property
1608
    def name(self):
1609
        """Returns the device name for the device
1610
        associated with this queue.
1611

1612
        Returns:
1613
            str:
1614
                The name of the device as a string.
1615
        """
1616
        return self.sycl_device.name
1✔
1617

1618
    @property
1619
    def driver_version(self):
1620
        """Returns the driver version for the device
1621
        associated with this queue.
1622

1623
        Returns:
1624
            str:
1625
                The driver version of the device as a string.
1626
        """
1627
        return self.sycl_device.driver_version
1✔
1628

1629
    def print_device_info(self):
1✔
1630
        """ Print information about the SYCL device
1631
        associated with this queue.
1632
        """
1633
        self.sycl_device.print_device_info()
1✔
1634

1635

1636
cdef api DPCTLSyclQueueRef SyclQueue_GetQueueRef(SyclQueue q):
1✔
1637
    """
1638
    C-API function to get opaque queue reference from
1639
    :class:`dpctl.SyclQueue` instance.
1640
    """
1641
    return q.get_queue_ref()
1✔
1642

1643

1644
cdef api SyclQueue SyclQueue_Make(DPCTLSyclQueueRef QRef):
1✔
1645
    """
1646
    C-API function to create :class:`dpctl.SyclQueue` instance
1647
    from the given opaque queue reference.
1648
    """
1649
    cdef DPCTLSyclQueueRef copied_QRef = DPCTLQueue_Copy(QRef)
1✔
1650
    return SyclQueue._create(copied_QRef)
1✔
1651

1652
cdef class _WorkGroupMemory:
1653
    def __dealloc__(self):
1654
        if(self._mem_ref):
1✔
1655
            DPCTLWorkGroupMemory_Delete(self._mem_ref)
1✔
1656

1657
cdef class WorkGroupMemory:
1658
    """
1659
    WorkGroupMemory(nbytes)
1660
    Python class representing the ``work_group_memory`` class from the
1661
    Workgroup Memory oneAPI SYCL extension for low-overhead allocation of local
1662
    memory shared by the workitems in a workgroup.
1663

1664
    This class is intended be used as kernel argument when launching kernels.
1665

1666
    This is based on a DPC++ SYCL extension and only available in newer
1667
    versions. Use ``is_available()`` to check availability in your build.
1668

1669
    There are multiple ways to create a `WorkGroupMemory`.
1670

1671
    - If the constructor is invoked with just a single argument, this argument
1672
      is interpreted as the number of bytes to allocated in the shared local
1673
      memory.
1674

1675
    - If the constructor is invoked with two arguments, the first argument is
1676
      interpreted as the datatype of the local memory, using the numpy type
1677
      naming scheme.
1678
      The second argument is interpreted as the number of elements to allocate.
1679
      The number of bytes to allocate is then computed from the byte size of
1680
      the data type and the element count.
1681

1682
    Args:
1683
        args:
1684
            Variadic argument, see class documentation.
1685

1686
    Raises:
1687
        TypeError: In case of incorrect arguments given to constructors,
1688
                   unexpected types of input arguments.
1689
    """
1690
    def __cinit__(self, *args):
1691
        cdef size_t nbytes
1692
        if not DPCTLWorkGroupMemory_Available():
1✔
1693
            raise RuntimeError("Workgroup memory extension not available")
×
1694

1695
        if not (0 < len(args) < 3):
1✔
1696
            raise TypeError("WorkGroupMemory constructor takes 1 or 2 "
×
1697
                            f"arguments, but {len(args)} were given")
×
1698

1699
        if len(args) == 1:
1✔
1700
            if not isinstance(args[0], numbers.Integral):
1✔
1701
                raise TypeError("WorkGroupMemory single argument constructor"
×
1702
                                "expects first argument to be `int`",
1703
                                f"but got {type(args[0])}")
×
1704
            nbytes = <size_t>(args[0])
1✔
1705
        else:
1706
            if not isinstance(args[0], str):
×
1707
                raise TypeError(
×
1708
                    "WorkGroupMemory constructor expects first"
×
1709
                    f"argument to be `str`, but got {type(args[0])}"
×
1710
                )
1711
            if not isinstance(args[1], numbers.Integral):
×
1712
                raise TypeError(
×
1713
                    "WorkGroupMemory constructor expects second"
×
1714
                    f"argument to be `int`, but got {type(args[1])}"
×
1715
                )
1716
            dtype = <str>(args[0])
×
1717
            count = <size_t>(args[1])
×
1718
            if not dtype[0] in ["i", "u", "f"]:
×
1719
                raise TypeError(f"Unrecognized type value: '{dtype}'")
×
1720
            try:
×
1721
                bit_width = int(dtype[1:])
×
1722
            except ValueError:
×
1723
                raise TypeError(f"Unrecognized type value: '{dtype}'")
×
1724

1725
            byte_size = <size_t>bit_width
×
1726
            nbytes = count * byte_size
×
1727

1728
        self._mem_ref = DPCTLWorkGroupMemory_Create(nbytes)
1✔
1729

1730
    # Check whether the work_group_memory extension is available
1731
    @staticmethod
1✔
1732
    def is_available():
1733
        return DPCTLWorkGroupMemory_Available()
1✔
1734

1735
    property _ref:
1736
        """Returns the address of the C API ``DPCTLWorkGroupMemoryRef``
1737
        pointer as a ``size_t``.
1738
        """
1739
        def __get__(self):
1740
            return <size_t>self._mem_ref
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc