16126000227

Committed 07 Jul 2025 07:24PM UTC coverage: 22.684% (-49.4%) from 72.051%

Build # 16126000227

Build Type

Pull #2519

github

Committed by

web-flow

Commit Message

Merge bd753a3a3 into 624f14f20

Pull Request Pull Request #2519: tmp changes

Coverage Stats

889 of 9756 branches covered (9.11%)

Branch coverage included in aggregate %.

6317 of 22011 relevant lines covered (28.7%)

35.96 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

6.01

/dpnp/backend/extensions/lapack/getri_batch.cpp

//*****************************************************************************
// Copyright (c) 2024-2025, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
// - Redistributions of source code must retain the above copyright notice,
//   this list of conditions and the following disclaimer.
// - Redistributions in binary form must reproduce the above copyright notice,
//   this list of conditions and the following disclaimer in the documentation
//   and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
// THE POSSIBILITY OF SUCH DAMAGE.
//*****************************************************************************

#include <stdexcept>

#include <pybind11/pybind11.h>

// dpctl tensor headers
#include "utils/memory_overlap.hpp"
#include "utils/sycl_alloc_utils.hpp"
#include "utils/type_utils.hpp"

#include "getri.hpp"
#include "types_matrix.hpp"

#include "dpnp_utils.hpp"

namespace dpnp::extensions::lapack
{
namespace mkl_lapack = oneapi::mkl::lapack;
namespace py = pybind11;
namespace type_utils = dpctl::tensor::type_utils;

typedef sycl::event (*getri_batch_impl_fn_ptr_t)(
    sycl::queue &,
    std::int64_t,
    char *,
    std::int64_t,
    std::int64_t,
    std::int64_t *,
    std::int64_t,
    std::int64_t,
    py::list,
    std::vector<sycl::event> &,
    const std::vector<sycl::event> &);

static getri_batch_impl_fn_ptr_t
    getri_batch_dispatch_vector[dpctl_td_ns::num_types];

template <typename T>
static sycl::event getri_batch_impl(sycl::queue &exec_q,
                                    std::int64_t n,
                                    char *in_a,
                                    std::int64_t lda,
                                    std::int64_t stride_a,
                                    std::int64_t *ipiv,
                                    std::int64_t stride_ipiv,
                                    std::int64_t batch_size,
                                    py::list dev_info,
                                    std::vector<sycl::event> &host_task_events,
                                    const std::vector<sycl::event> &depends)
{
    type_utils::validate_type_for_device<T>(exec_q);

    T *a = reinterpret_cast<T *>(in_a);

    const std::int64_t scratchpad_size =
        mkl_lapack::getri_batch_scratchpad_size<T>(exec_q, n, lda, stride_a,
                                                   stride_ipiv, batch_size);
    T *scratchpad = nullptr;

    std::stringstream error_msg;
    std::int64_t info = 0;
    bool is_exception_caught = false;

    sycl::event getri_batch_event;
    try {
        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);

        getri_batch_event = mkl_lapack::getri_batch(
            exec_q,
            n, // The order of each square matrix in the batch; (0 ≤ n).
               // It must be a non-negative integer.
            a, // Pointer to the batch of square matrices, each of size (n x n).
            lda,      // The leading dimension of each matrix in the batch.
            stride_a, // Stride between consecutive matrices in the batch.
            ipiv, // Pointer to the array of pivot indices for each matrix in
                  // the batch.
            stride_ipiv, // Stride between pivot indices: Spacing between pivot
                         // arrays in 'ipiv'.
            batch_size,  // Total number of matrices in the batch.
            scratchpad,  // Pointer to scratchpad memory to be used by MKL
                         // routine for storing intermediate results.
            scratchpad_size, depends);
    } catch (mkl_lapack::batch_error const &be) {
        // Get the indices of matrices within the batch that encountered an
        // error
        auto error_matrices_ids = be.ids();

        auto error_matrices_ids_size = error_matrices_ids.size();
        auto dev_info_size = static_cast<std::size_t>(py::len(dev_info));
        if (error_matrices_ids_size > dev_info_size) {
            throw py::value_error("The size of `dev_info` must be greater than"
                                  " or equal to " +
                                  std::to_string(error_matrices_ids_size) +
                                  ", but currently it is " +
                                  std::to_string(dev_info_size) + ".");
        }

        // OneMKL batched functions throw a single `batch_error`
        // instead of per-matrix exceptions or an info array.
        // This is interpreted as a computation_error (singular matrix),
        // consistent with non-batched LAPACK behavior.
        // Set dev_info[...] to any positive value for each failed index.
        for (size_t i = 0; i < error_matrices_ids.size(); ++i) {
            dev_info[error_matrices_ids[i]] = 1;
        }
    } catch (mkl_lapack::exception const &e) {
        is_exception_caught = true;
        info = e.info();

        if (info < 0) {
            error_msg << "Parameter number " << -info
                      << " had an illegal value.";
        }
        else if (info == scratchpad_size && e.detail() != 0) {
            error_msg
                << "Insufficient scratchpad size. Required size is at least "
                << e.detail();
        }
        else {
            error_msg << "Unexpected MKL exception caught during getri_batch() "
                         "call:\nreason: "
                      << e.what() << "\ninfo: " << e.info();
        }
    } catch (sycl::exception const &e) {
        is_exception_caught = true;
        error_msg
            << "Unexpected SYCL exception caught during getri_batch() call:\n"
            << e.what();
    }

    if (is_exception_caught) // an unexpected error occurs
    {
        if (scratchpad != nullptr) {
            dpctl::tensor::alloc_utils::sycl_free_noexcept(scratchpad, exec_q);
        }

        throw std::runtime_error(error_msg.str());
    }

    sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
        cgh.depends_on(getri_batch_event);
        auto ctx = exec_q.get_context();
        cgh.host_task([ctx, scratchpad]() {
            dpctl::tensor::alloc_utils::sycl_free_noexcept(scratchpad, ctx);
        });
    });
    host_task_events.push_back(clean_up_event);
    return getri_batch_event;
}

std::pair<sycl::event, sycl::event>
    getri_batch(sycl::queue &exec_q,
                const dpctl::tensor::usm_ndarray &a_array,
                const dpctl::tensor::usm_ndarray &ipiv_array,
                py::list dev_info,
                std::int64_t n,
                std::int64_t stride_a,
                std::int64_t stride_ipiv,
                std::int64_t batch_size,
                const std::vector<sycl::event> &depends)
{
    const int a_array_nd = a_array.get_ndim();
    const int ipiv_array_nd = ipiv_array.get_ndim();

    if (a_array_nd < 3) {
        throw py::value_error(
            "The input array has ndim=" + std::to_string(a_array_nd) +
            ", but an array with ndim >= 3 is expected.");
    }

    if (ipiv_array_nd != 2) {
        throw py::value_error("The array of pivot indices has ndim=" +
                              std::to_string(ipiv_array_nd) +
                              ", but a 2-dimensional array is expected.");
    }

    const int dev_info_size = py::len(dev_info);
    if (dev_info_size != batch_size) {
        throw py::value_error("The size of 'dev_info' (" +
                              std::to_string(dev_info_size) +
                              ") does not match the expected batch size (" +
                              std::to_string(batch_size) + ").");
    }

    // check compatibility of execution queue and allocation queue
    if (!dpctl::utils::queues_are_compatible(exec_q, {a_array, ipiv_array})) {
        throw py::value_error(
            "Execution queue is not compatible with allocation queues");
    }

    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
    if (overlap(a_array, ipiv_array)) {
        throw py::value_error("The input array and the array of pivot indices "
                              "are overlapping segments of memory");
    }

    bool is_a_array_c_contig = a_array.is_c_contiguous();
    bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous();
    if (!is_a_array_c_contig) {
        throw py::value_error("The input array "
                              "must be C-contiguous");
    }
    if (!is_ipiv_array_c_contig) {
        throw py::value_error("The array of pivot indices "
                              "must be C-contiguous");
    }

    auto array_types = dpctl_td_ns::usm_ndarray_types();
    int a_array_type_id =
        array_types.typenum_to_lookup_id(a_array.get_typenum());

    getri_batch_impl_fn_ptr_t getri_batch_fn =
        getri_batch_dispatch_vector[a_array_type_id];
    if (getri_batch_fn == nullptr) {
        throw py::value_error(
            "No getri_batch implementation defined for the provided type "
            "of the input matrix.");
    }

    auto ipiv_types = dpctl_td_ns::usm_ndarray_types();
    int ipiv_array_type_id =
        ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum());

    if (ipiv_array_type_id != static_cast<int>(dpctl_td_ns::typenum_t::INT64)) {
        throw py::value_error("The type of 'ipiv_array' must be int64.");
    }

    char *a_array_data = a_array.get_data();
    const std::int64_t lda = std::max<size_t>(1UL, n);

    char *ipiv_array_data = ipiv_array.get_data();
    std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);

    std::vector<sycl::event> host_task_events;
    sycl::event getri_batch_ev = getri_batch_fn(
        exec_q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, batch_size,
        dev_info, host_task_events, depends);

    sycl::event args_ev = dpctl::utils::keep_args_alive(
        exec_q, {a_array, ipiv_array}, host_task_events);

    return std::make_pair(args_ev, getri_batch_ev);
}

template <typename fnT, typename T>
struct GetriBatchContigFactory
{
    fnT get()
    {
        if constexpr (types::GetriBatchTypePairSupportFactory<T>::is_defined) {
            return getri_batch_impl<T>;
        }
        else {
            return nullptr;
        }
    }
};

void init_getri_batch_dispatch_vector(void)
{
    dpctl_td_ns::DispatchVectorBuilder<getri_batch_impl_fn_ptr_t,
                                       GetriBatchContigFactory,
                                       dpctl_td_ns::num_types>
        contig;
    contig.populate_dispatch_vector(getri_batch_dispatch_vector);
}
} // namespace dpnp::extensions::lapack

1	//*****************************************************************************
2	// Copyright (c) 2024-2025, Intel Corporation
3	// All rights reserved.
4	//
5	// Redistribution and use in source and binary forms, with or without
6	// modification, are permitted provided that the following conditions are met:
7	// - Redistributions of source code must retain the above copyright notice,
8	// this list of conditions and the following disclaimer.
9	// - Redistributions in binary form must reproduce the above copyright notice,
10	// this list of conditions and the following disclaimer in the documentation
11	// and/or other materials provided with the distribution.
12	//
13	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23	// THE POSSIBILITY OF SUCH DAMAGE.
24	//*****************************************************************************
25
26	#include <stdexcept>
27
28	#include <pybind11/pybind11.h>
29
30	// dpctl tensor headers
31	#include "utils/memory_overlap.hpp"
32	#include "utils/sycl_alloc_utils.hpp"
33	#include "utils/type_utils.hpp"
34
35	#include "getri.hpp"
36	#include "types_matrix.hpp"
37
38	#include "dpnp_utils.hpp"
39
40	namespace dpnp::extensions::lapack
41	{
42	namespace mkl_lapack = oneapi::mkl::lapack;
43	namespace py = pybind11;
44	namespace type_utils = dpctl::tensor::type_utils;
45
46	typedef sycl::event (*getri_batch_impl_fn_ptr_t)(
47	sycl::queue &,
48	std::int64_t,
49	char *,
50	std::int64_t,
51	std::int64_t,
52	std::int64_t *,
53	std::int64_t,
54	std::int64_t,
55	py::list,
56	std::vector<sycl::event> &,
57	const std::vector<sycl::event> &);
58
59	static getri_batch_impl_fn_ptr_t
60	getri_batch_dispatch_vector[dpctl_td_ns::num_types];
61
62	template <typename T>
63	static sycl::event getri_batch_impl(sycl::queue &exec_q,
64	std::int64_t n,
65	char *in_a,
66	std::int64_t lda,
67	std::int64_t stride_a,
68	std::int64_t *ipiv,
69	std::int64_t stride_ipiv,
70	std::int64_t batch_size,
71	py::list dev_info,
72	std::vector<sycl::event> &host_task_events,
73	const std::vector<sycl::event> &depends)
74	{	×
75	type_utils::validate_type_for_device<T>(exec_q);	×
76
77	T a = reinterpret_cast<T >(in_a);	×
78
79	const std::int64_t scratchpad_size =	×
80	mkl_lapack::getri_batch_scratchpad_size<T>(exec_q, n, lda, stride_a,	×
81	stride_ipiv, batch_size);	×
82	T *scratchpad = nullptr;	×
83
84	std::stringstream error_msg;	×
85	std::int64_t info = 0;	×
86	bool is_exception_caught = false;	×
87
88	sycl::event getri_batch_event;	×
89	try {	×
90	scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);	×
91
92	getri_batch_event = mkl_lapack::getri_batch(	×
93	exec_q,	×
94	n, // The order of each square matrix in the batch; (0 ≤ n).	×
95	// It must be a non-negative integer.
96	a, // Pointer to the batch of square matrices, each of size (n x n).	×
97	lda, // The leading dimension of each matrix in the batch.	×
98	stride_a, // Stride between consecutive matrices in the batch.	×
99	ipiv, // Pointer to the array of pivot indices for each matrix in	×
100	// the batch.
101	stride_ipiv, // Stride between pivot indices: Spacing between pivot	×
102	// arrays in 'ipiv'.
103	batch_size, // Total number of matrices in the batch.	×
104	scratchpad, // Pointer to scratchpad memory to be used by MKL	×
105	// routine for storing intermediate results.
106	scratchpad_size, depends);	×
107	} catch (mkl_lapack::batch_error const &be) {	×
108	// Get the indices of matrices within the batch that encountered an
109	// error
110	auto error_matrices_ids = be.ids();	×
111
112	auto error_matrices_ids_size = error_matrices_ids.size();	×
113	auto dev_info_size = static_cast<std::size_t>(py::len(dev_info));	×
114	if (error_matrices_ids_size > dev_info_size) {	×
115	throw py::value_error("The size of `dev_info` must be greater than"	×
116	" or equal to " +	×
117	std::to_string(error_matrices_ids_size) +	×
118	", but currently it is " +	×
119	std::to_string(dev_info_size) + ".");	×
120	}	×
121
122	// OneMKL batched functions throw a single `batch_error`
123	// instead of per-matrix exceptions or an info array.
124	// This is interpreted as a computation_error (singular matrix),
125	// consistent with non-batched LAPACK behavior.
126	// Set dev_info[...] to any positive value for each failed index.
127	for (size_t i = 0; i < error_matrices_ids.size(); ++i) {	×
128	dev_info[error_matrices_ids[i]] = 1;	×
129	}	×
130	} catch (mkl_lapack::exception const &e) {	×
131	is_exception_caught = true;	×
132	info = e.info();	×
133
134	if (info < 0) {	×
135	error_msg << "Parameter number " << -info	×
136	<< " had an illegal value.";	×
137	}	×
138	else if (info == scratchpad_size && e.detail() != 0) {	×
139	error_msg	×
140	<< "Insufficient scratchpad size. Required size is at least "	×
141	<< e.detail();	×
142	}	×
143	else {	×
144	error_msg << "Unexpected MKL exception caught during getri_batch() "	×
145	"call:\nreason: "	×
146	<< e.what() << "\ninfo: " << e.info();	×
147	}	×
148	} catch (sycl::exception const &e) {	×
149	is_exception_caught = true;	×
150	error_msg	×
151	<< "Unexpected SYCL exception caught during getri_batch() call:\n"	×
152	<< e.what();	×
153	}	×
154
155	if (is_exception_caught) // an unexpected error occurs	×
156	{	×
157	if (scratchpad != nullptr) {	×
158	dpctl::tensor::alloc_utils::sycl_free_noexcept(scratchpad, exec_q);	×
159	}	×
160
161	throw std::runtime_error(error_msg.str());	×
162	}	×
163
164	sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {	×
165	cgh.depends_on(getri_batch_event);	×
166	auto ctx = exec_q.get_context();	×
167	cgh.host_task([ctx, scratchpad]() {	×
168	dpctl::tensor::alloc_utils::sycl_free_noexcept(scratchpad, ctx);	×
169	});	×
170	});	×
171	host_task_events.push_back(clean_up_event);	×
172	return getri_batch_event;	×
173	}	×
174
175	std::pair<sycl::event, sycl::event>
176	getri_batch(sycl::queue &exec_q,
177	const dpctl::tensor::usm_ndarray &a_array,
178	const dpctl::tensor::usm_ndarray &ipiv_array,
179	py::list dev_info,
180	std::int64_t n,
181	std::int64_t stride_a,
182	std::int64_t stride_ipiv,
183	std::int64_t batch_size,
184	const std::vector<sycl::event> &depends)
185	{	×
186	const int a_array_nd = a_array.get_ndim();	×
187	const int ipiv_array_nd = ipiv_array.get_ndim();	×
188
189	if (a_array_nd < 3) {	×
190	throw py::value_error(	×
191	"The input array has ndim=" + std::to_string(a_array_nd) +	×
192	", but an array with ndim >= 3 is expected.");	×
193	}	×
194
195	if (ipiv_array_nd != 2) {	×
196	throw py::value_error("The array of pivot indices has ndim=" +	×
197	std::to_string(ipiv_array_nd) +	×
198	", but a 2-dimensional array is expected.");	×
199	}	×
200
201	const int dev_info_size = py::len(dev_info);	×
202	if (dev_info_size != batch_size) {	×
203	throw py::value_error("The size of 'dev_info' (" +	×
204	std::to_string(dev_info_size) +	×
205	") does not match the expected batch size (" +	×
206	std::to_string(batch_size) + ").");	×
207	}	×
208
209	// check compatibility of execution queue and allocation queue
210	if (!dpctl::utils::queues_are_compatible(exec_q, {a_array, ipiv_array})) {	×
211	throw py::value_error(	×
212	"Execution queue is not compatible with allocation queues");	×
213	}	×
214
215	auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();	×
216	if (overlap(a_array, ipiv_array)) {	×
217	throw py::value_error("The input array and the array of pivot indices "	×
218	"are overlapping segments of memory");	×
219	}	×
220
221	bool is_a_array_c_contig = a_array.is_c_contiguous();	×
222	bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous();	×
223	if (!is_a_array_c_contig) {	×
224	throw py::value_error("The input array "	×
225	"must be C-contiguous");	×
226	}	×
227	if (!is_ipiv_array_c_contig) {	×
228	throw py::value_error("The array of pivot indices "	×
229	"must be C-contiguous");	×
230	}	×
231
232	auto array_types = dpctl_td_ns::usm_ndarray_types();	×
233	int a_array_type_id =	×
234	array_types.typenum_to_lookup_id(a_array.get_typenum());	×
235
236	getri_batch_impl_fn_ptr_t getri_batch_fn =	×
237	getri_batch_dispatch_vector[a_array_type_id];	×
238	if (getri_batch_fn == nullptr) {	×
239	throw py::value_error(	×
240	"No getri_batch implementation defined for the provided type "	×
241	"of the input matrix.");	×
242	}	×
243
244	auto ipiv_types = dpctl_td_ns::usm_ndarray_types();	×
245	int ipiv_array_type_id =	×
246	ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum());	×
247
248	if (ipiv_array_type_id != static_cast<int>(dpctl_td_ns::typenum_t::INT64)) {	×
249	throw py::value_error("The type of 'ipiv_array' must be int64.");	×
250	}	×
251
252	char *a_array_data = a_array.get_data();	×
253	const std::int64_t lda = std::max<size_t>(1UL, n);	×
254
255	char *ipiv_array_data = ipiv_array.get_data();	×
256	std::int64_t d_ipiv = reinterpret_cast<std::int64_t >(ipiv_array_data);	×
257
258	std::vector<sycl::event> host_task_events;	×
259	sycl::event getri_batch_ev = getri_batch_fn(	×
260	exec_q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, batch_size,	×
261	dev_info, host_task_events, depends);	×
262
263	sycl::event args_ev = dpctl::utils::keep_args_alive(	×
264	exec_q, {a_array, ipiv_array}, host_task_events);	×
265
266	return std::make_pair(args_ev, getri_batch_ev);	×
267	}	×
268
269	template <typename fnT, typename T>
270	struct GetriBatchContigFactory
271	{
272	fnT get()
273	{	28✔
274	if constexpr (types::GetriBatchTypePairSupportFactory<T>::is_defined) {	28✔
275	return getri_batch_impl<T>;	8✔
276	}
277	else {	20✔
278	return nullptr;	20✔
279	}	20✔
280	}	28✔
281	};
282
283	void init_getri_batch_dispatch_vector(void)
284	{	2✔
285	dpctl_td_ns::DispatchVectorBuilder<getri_batch_impl_fn_ptr_t,	2✔
286	GetriBatchContigFactory,	2✔
287	dpctl_td_ns::num_types>	2✔
288	contig;	2✔
289	contig.populate_dispatch_vector(getri_batch_dispatch_vector);	2✔
290	}	2✔
291	} // namespace dpnp::extensions::lapack

IntelPython / dpnp / 16126000227

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous