14977901736

Committed 12 May 2025 04:48PM UTC coverage: 71.781% (+0.007%) from 71.774%

Build # 14977901736

Build Type

Pull #2440

github

Committed by

web-flow

Commit Message

Merge 9ae87c975 into a37fadb8d

Pull Request Pull Request #2440: Move all logic with `cyl_bessel_i0` support in `i0.hpp`

Run Details

4718 of 9606 branches covered (49.12%)

Branch coverage included in aggregate %.

0 of 1 new or added line in 1 file covered. (0.0%)

2 existing lines in 1 file now uncovered.

17728 of 21664 relevant lines covered (81.83%)

19740.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/dpnp/backend/kernels/elementwise_functions/i0.hpp

//*****************************************************************************
// Copyright (c) 2024-2025, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
// - Redistributions of source code must retain the above copyright notice,
//   this list of conditions and the following disclaimer.
// - Redistributions in binary form must reproduce the above copyright notice,
//   this list of conditions and the following disclaimer in the documentation
//   and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
// THE POSSIBILITY OF SUCH DAMAGE.
//*****************************************************************************

#pragma once

#include <sycl/sycl.hpp>

/**
 * Version of SYCL DPC++ 2025.1 compiler where an issue with
 * sycl::ext::intel::math::cyl_bessel_i0(x) is fully resolved.
 */
#ifndef __SYCL_COMPILER_BESSEL_I0_SUPPORT
#define __SYCL_COMPILER_BESSEL_I0_SUPPORT 20241208L
#endif

/**
 * Include <sycl/ext/intel/math.hpp> only when targeting to Intel devices.
 * This header relies on intel-specific types like _iml_half_internal,
 * which are not suppose to work with other targets (e.g., CUDA, AMD).
 */
#if defined(__SPIR__) && defined(__INTEL_LLVM_COMPILER)
#define __SYCL_EXT_INTEL_MATH_SUPPORT
#endif

#if defined(__SYCL_EXT_INTEL_MATH_SUPPORT) &&                                  \
    (__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_BESSEL_I0_SUPPORT)
#include <sycl/ext/intel/math.hpp>
#endif

namespace dpnp::kernels::i0
{
#if defined(__SYCL_EXT_INTEL_MATH_SUPPORT) &&                                  \
    (__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_BESSEL_I0_SUPPORT)
using sycl::ext::intel::math::cyl_bessel_i0;

#else

/**
 * The below implementation of Bessel function of order 0
 * is based on the source code from https://github.com/gcc-mirror/gcc
 */
namespace impl
{
/**
 * @brief This routine returns the cylindrical Bessel functions
 *        of order 0 by series expansion.
 *
 * @param x The argument of the Bessel function.
 * @return The output Bessel function.
 */
template <typename Tp>
inline Tp cyl_bessel_ij_0_series(const Tp x, const unsigned int max_iter)
{
    const Tp x2 = x / Tp(2);
    const Tp fact = sycl::exp(-sycl::lgamma(Tp(1)));

    const Tp xx4 = x2 * x2;
    Tp Jn = Tp(1);
    Tp term = Tp(1);
    constexpr Tp eps = std::numeric_limits<Tp>::epsilon();

    for (unsigned int i = 1; i < max_iter; ++i) {
        term *= xx4 / (Tp(i) * Tp(i));
        Jn += term;
        if (sycl::fabs(term / Jn) < eps) {
            break;
        }
    }
    return fact * Jn;
}

/**
 * @brief Compute the modified Bessel functions.
 *
 * @param x The argument of the Bessel functions.
 * @return The output Bessel function.
 */
template <typename Tp>
inline Tp bessel_ik_0(Tp x)
{
    constexpr Tp eps = std::numeric_limits<Tp>::epsilon();
    constexpr Tp fp_min = Tp(10) * eps;
    constexpr int max_iter = 15000;
    constexpr Tp x_min = Tp(2);

    const Tp mu = Tp(0);
    const Tp mu2 = mu * mu;
    const Tp xi = Tp(1) / x;
    const Tp xi2 = Tp(2) * xi;
    Tp h = fp_min;

    Tp b = Tp(0);
    Tp d = Tp(0);
    Tp c = h;
    int i;
    for (i = 1; i <= max_iter; ++i) {
        b += xi2;
        d = Tp(1) / (b + d);
        c = b + Tp(1) / c;

        const Tp del = c * d;
        h *= del;
        if (sycl::fabs(del - Tp(1)) < eps) {
            break;
        }
    }
    if (i > max_iter) {
        // argument `x` is too large
        return std::numeric_limits<Tp>::infinity();
    }

    Tp Inul = fp_min;
    const Tp Inul1 = Inul;
    const Tp Ipnul = h * Inul;

    constexpr Tp pi = static_cast<Tp>(3.1415926535897932384626433832795029L);
    Tp f = Ipnul / Inul;
    Tp Kmu, Knu1;
    if (x < x_min) {
        const Tp x2 = x / Tp(2);
        const Tp pimu = pi * mu;
        const Tp fact =
            (sycl::fabs(pimu) < eps ? Tp(1) : pimu / sycl::sin(pimu));

        Tp d = -sycl::log(x2);
        Tp e = mu * d;
        const Tp fact2 = (sycl::fabs(e) < eps ? Tp(1) : sycl::sinh(e) / e);

        // compute the gamma functions required by the Temme series expansions
        constexpr Tp gam1 =
            -static_cast<Tp>(0.5772156649015328606065120900824024L);
        const Tp gam2 = Tp(1) / sycl::tgamma(Tp(1));

        Tp ff = fact * (gam1 * sycl::cosh(e) + gam2 * fact2 * d);
        Tp sum = ff;
        e = sycl::exp(e);

        Tp p = e / (Tp(2) * gam2);
        Tp q = Tp(1) / (Tp(2) * e * gam2);
        Tp c = Tp(1);
        d = x2 * x2;
        Tp sum1 = p;
        int i;
        for (i = 1; i <= max_iter; ++i) {
            ff = (i * ff + p + q) / (i * i - mu2);
            c *= d / i;
            p /= i - mu;
            q /= i + mu;
            const Tp del = c * ff;
            sum += del;
            const Tp __del1 = c * (p - i * ff);
            sum1 += __del1;
            if (sycl::fabs(del) < eps * sycl::fabs(sum)) {
                break;
            }
        }
        if (i > max_iter) {
            // Bessel k series failed to converge
            return std::numeric_limits<Tp>::quiet_NaN();
        }
        Kmu = sum;
        Knu1 = sum1 * xi2;
    }
    else {
        Tp b = Tp(2) * (Tp(1) + x);
        Tp d = Tp(1) / b;
        Tp delh = d;
        Tp h = delh;
        Tp q1 = Tp(0);
        Tp q2 = Tp(1);
        Tp a1 = Tp(0.25L) - mu2;
        Tp q = c = a1;
        Tp a = -a1;
        Tp s = Tp(1) + q * delh;
        int i;
        for (i = 2; i <= max_iter; ++i) {
            a -= 2 * (i - 1);
            c = -a * c / i;
            const Tp qnew = (q1 - b * q2) / a;
            q1 = q2;
            q2 = qnew;
            q += c * qnew;
            b += Tp(2);
            d = Tp(1) / (b + a * d);
            delh = (b * d - Tp(1)) * delh;
            h += delh;
            const Tp dels = q * delh;
            s += dels;
            if (sycl::fabs(dels / s) < eps) {
                break;
            }
        }
        if (i > max_iter) {
            // Steed's method failed
            return std::numeric_limits<Tp>::quiet_NaN();
        }
        h = a1 * h;
        Kmu = sycl::sqrt(pi / (Tp(2) * x)) * sycl::exp(-x) / s;
        Knu1 = Kmu * (mu + x + Tp(0.5L) - h) * xi;
    }

    Tp Kpmu = mu * xi * Kmu - Knu1;
    Tp Inumu = xi / (f * Kmu - Kpmu);
    return Inumu * Inul1 / Inul;
}

/**
 * @brief Return the regular modified Bessel function of order 0.
 *
 * @param x The argument of the regular modified Bessel function.
 * @return The output regular modified Bessel function.
 */
template <typename Tp>
inline Tp cyl_bessel_i0(Tp x)
{
    if (sycl::isnan(x)) {
        return std::numeric_limits<Tp>::quiet_NaN();
    }

    if (sycl::isinf(x)) {
        // return +inf per any input infinity
        return std::numeric_limits<Tp>::infinity();
    }

    if (x == Tp(0)) {
        return Tp(1);
    }

    if (x * x < Tp(10)) {
        return cyl_bessel_ij_0_series<Tp>(x, 200);
    }
    return bessel_ik_0(sycl::fabs(x));
}
} // namespace impl

using impl::cyl_bessel_i0;

#endif

template <typename argT, typename resT>
struct I0Functor
{
    // is function constant for given argT
    using is_constant = typename std::false_type;
    // constant value, if constant
    // constexpr resT constant_value = resT{};
    // is function defined for sycl::vec
    using supports_vec = typename std::false_type;
    // do both argT and resT support subgroup store/load operation
    using supports_sg_loadstore = typename std::true_type;

    resT operator()(const argT &x) const
    {
        if constexpr (std::is_same_v<resT, sycl::half>) {
            return static_cast<resT>(cyl_bessel_i0<float>(float(x)));
        }
        else {
            return cyl_bessel_i0(x);
        }
    }
};
} // namespace dpnp::kernels::i0

1	//*****************************************************************************
2	// Copyright (c) 2024-2025, Intel Corporation
3	// All rights reserved.
4	//
5	// Redistribution and use in source and binary forms, with or without
6	// modification, are permitted provided that the following conditions are met:
7	// - Redistributions of source code must retain the above copyright notice,
8	// this list of conditions and the following disclaimer.
9	// - Redistributions in binary form must reproduce the above copyright notice,
10	// this list of conditions and the following disclaimer in the documentation
11	// and/or other materials provided with the distribution.
12	//
13	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23	// THE POSSIBILITY OF SUCH DAMAGE.
24	//*****************************************************************************
25
26	#pragma once
27
28	#include <sycl/sycl.hpp>
29
30	/**
31	* Version of SYCL DPC++ 2025.1 compiler where an issue with
32	* sycl::ext::intel::math::cyl_bessel_i0(x) is fully resolved.
33	*/
34	#ifndef __SYCL_COMPILER_BESSEL_I0_SUPPORT
35	#define __SYCL_COMPILER_BESSEL_I0_SUPPORT 20241208L
36	#endif
37
38	/**
39	* Include <sycl/ext/intel/math.hpp> only when targeting to Intel devices.
40	* This header relies on intel-specific types like _iml_half_internal,
41	* which are not suppose to work with other targets (e.g., CUDA, AMD).
42	*/
43	#if defined(__SPIR__) && defined(__INTEL_LLVM_COMPILER)
44	#define __SYCL_EXT_INTEL_MATH_SUPPORT
45	#endif
46
47	#if defined(__SYCL_EXT_INTEL_MATH_SUPPORT) && \
48	(__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_BESSEL_I0_SUPPORT)
49	#include <sycl/ext/intel/math.hpp>
50	#endif
51
52	namespace dpnp::kernels::i0
53	{
54	#if defined(__SYCL_EXT_INTEL_MATH_SUPPORT) && \
55	(__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_BESSEL_I0_SUPPORT)
56	using sycl::ext::intel::math::cyl_bessel_i0;
57
58	#else
59
60	/**
61	* The below implementation of Bessel function of order 0
62	* is based on the source code from https://github.com/gcc-mirror/gcc
63	*/
64	namespace impl
65	{
66	/**
67	* @brief This routine returns the cylindrical Bessel functions
68	* of order 0 by series expansion.
69	*
70	* @param x The argument of the Bessel function.
71	* @return The output Bessel function.
72	*/
73	template <typename Tp>
74	inline Tp cyl_bessel_ij_0_series(const Tp x, const unsigned int max_iter)
75	{	×
76	const Tp x2 = x / Tp(2);	×
77	const Tp fact = sycl::exp(-sycl::lgamma(Tp(1)));	×
78
79	const Tp xx4 = x2 * x2;	×
80	Tp Jn = Tp(1);	×
81	Tp term = Tp(1);	×
82	constexpr Tp eps = std::numeric_limits<Tp>::epsilon();	×
83
84	for (unsigned int i = 1; i < max_iter; ++i) {	×
85	term = xx4 / (Tp(i) Tp(i));	×
86	Jn += term;	×
87	if (sycl::fabs(term / Jn) < eps) {	×
88	break;	×
89	}	×
90	}	×
91	return fact * Jn;	×
92	}	×
93
94	/**
95	* @brief Compute the modified Bessel functions.
96	*
97	* @param x The argument of the Bessel functions.
98	* @return The output Bessel function.
99	*/
100	template <typename Tp>
101	inline Tp bessel_ik_0(Tp x)
102	{	×
103	constexpr Tp eps = std::numeric_limits<Tp>::epsilon();	×
104	constexpr Tp fp_min = Tp(10) * eps;	×
105	constexpr int max_iter = 15000;	×
106	constexpr Tp x_min = Tp(2);	×
107
108	const Tp mu = Tp(0);	×
109	const Tp mu2 = mu * mu;	×
110	const Tp xi = Tp(1) / x;	×
111	const Tp xi2 = Tp(2) * xi;	×
112	Tp h = fp_min;	×
113
114	Tp b = Tp(0);	×
115	Tp d = Tp(0);	×
116	Tp c = h;	×
117	int i;	×
118	for (i = 1; i <= max_iter; ++i) {	×
119	b += xi2;	×
120	d = Tp(1) / (b + d);	×
121	c = b + Tp(1) / c;	×
122
123	const Tp del = c * d;	×
124	h *= del;	×
125	if (sycl::fabs(del - Tp(1)) < eps) {	×
126	break;	×
127	}	×
128	}	×
129	if (i > max_iter) {	×
130	// argument `x` is too large
131	return std::numeric_limits<Tp>::infinity();	×
132	}	×
133
134	Tp Inul = fp_min;	×
135	const Tp Inul1 = Inul;	×
136	const Tp Ipnul = h * Inul;	×
137
138	constexpr Tp pi = static_cast<Tp>(3.1415926535897932384626433832795029L);	×
139	Tp f = Ipnul / Inul;	×
140	Tp Kmu, Knu1;	×
141	if (x < x_min) {	×
142	const Tp x2 = x / Tp(2);	×
143	const Tp pimu = pi * mu;	×
144	const Tp fact =	×
145	(sycl::fabs(pimu) < eps ? Tp(1) : pimu / sycl::sin(pimu));	×
146
147	Tp d = -sycl::log(x2);	×
148	Tp e = mu * d;	×
149	const Tp fact2 = (sycl::fabs(e) < eps ? Tp(1) : sycl::sinh(e) / e);	×
150
151	// compute the gamma functions required by the Temme series expansions
152	constexpr Tp gam1 =	×
153	-static_cast<Tp>(0.5772156649015328606065120900824024L);	×
154	const Tp gam2 = Tp(1) / sycl::tgamma(Tp(1));	×
155
156	Tp ff = fact * (gam1 * sycl::cosh(e) + gam2 * fact2 * d);	×
157	Tp sum = ff;	×
158	e = sycl::exp(e);	×
159
160	Tp p = e / (Tp(2) * gam2);	×
161	Tp q = Tp(1) / (Tp(2) * e * gam2);	×
162	Tp c = Tp(1);	×
163	d = x2 * x2;	×
164	Tp sum1 = p;	×
165	int i;	×
166	for (i = 1; i <= max_iter; ++i) {	×
167	ff = (i * ff + p + q) / (i * i - mu2);	×
168	c *= d / i;	×
169	p /= i - mu;	×
170	q /= i + mu;	×
171	const Tp del = c * ff;	×
172	sum += del;	×
173	const Tp __del1 = c * (p - i * ff);	×
174	sum1 += __del1;	×
175	if (sycl::fabs(del) < eps * sycl::fabs(sum)) {	×
176	break;	×
177	}	×
178	}	×
179	if (i > max_iter) {	×
180	// Bessel k series failed to converge
181	return std::numeric_limits<Tp>::quiet_NaN();	×
182	}	×
183	Kmu = sum;	×
184	Knu1 = sum1 * xi2;	×
185	}	×
186	else {	×
187	Tp b = Tp(2) * (Tp(1) + x);	×
188	Tp d = Tp(1) / b;	×
189	Tp delh = d;	×
190	Tp h = delh;	×
191	Tp q1 = Tp(0);	×
192	Tp q2 = Tp(1);	×
193	Tp a1 = Tp(0.25L) - mu2;	×
194	Tp q = c = a1;	×
195	Tp a = -a1;	×
196	Tp s = Tp(1) + q * delh;	×
197	int i;	×
198	for (i = 2; i <= max_iter; ++i) {	×
199	a -= 2 * (i - 1);	×
200	c = -a * c / i;	×
201	const Tp qnew = (q1 - b * q2) / a;	×
202	q1 = q2;	×
203	q2 = qnew;	×
204	q += c * qnew;	×
205	b += Tp(2);	×
206	d = Tp(1) / (b + a * d);	×
207	delh = (b * d - Tp(1)) * delh;	×
208	h += delh;	×
209	const Tp dels = q * delh;	×
210	s += dels;	×
211	if (sycl::fabs(dels / s) < eps) {	×
212	break;	×
213	}	×
214	}	×
215	if (i > max_iter) {	×
216	// Steed's method failed
217	return std::numeric_limits<Tp>::quiet_NaN();	×
218	}	×
219	h = a1 * h;	×
220	Kmu = sycl::sqrt(pi / (Tp(2) * x)) * sycl::exp(-x) / s;	×
221	Knu1 = Kmu * (mu + x + Tp(0.5L) - h) * xi;	×
222	}	×
223
224	Tp Kpmu = mu * xi * Kmu - Knu1;	×
225	Tp Inumu = xi / (f * Kmu - Kpmu);	×
226	return Inumu * Inul1 / Inul;	×
227	}	×
228
229	/**
230	* @brief Return the regular modified Bessel function of order 0.
231	*
232	* @param x The argument of the regular modified Bessel function.
233	* @return The output regular modified Bessel function.
234	*/
235	template <typename Tp>
236	inline Tp cyl_bessel_i0(Tp x)
237	{	×
238	if (sycl::isnan(x)) {	×
239	return std::numeric_limits<Tp>::quiet_NaN();	×
240	}	×
241
242	if (sycl::isinf(x)) {	×
243	// return +inf per any input infinity
244	return std::numeric_limits<Tp>::infinity();	×
245	}	×
246
247	if (x == Tp(0)) {	×
248	return Tp(1);	×
249	}	×
250
251	if (x * x < Tp(10)) {	×
252	return cyl_bessel_ij_0_series<Tp>(x, 200);	×
253	}	×
254	return bessel_ik_0(sycl::fabs(x));	×
255	}	×
256	} // namespace impl
257
258	using impl::cyl_bessel_i0;
259
260	#endif
261
262	template <typename argT, typename resT>
263	struct I0Functor
264	{
265	// is function constant for given argT
266	using is_constant = typename std::false_type;
267	// constant value, if constant
268	// constexpr resT constant_value = resT{};
269	// is function defined for sycl::vec
270	using supports_vec = typename std::false_type;
271	// do both argT and resT support subgroup store/load operation
272	using supports_sg_loadstore = typename std::true_type;
273
274	resT operator()(const argT &x) const
275	{	×
UNCOV 276	if constexpr (std::is_same_v<resT, sycl::half>) {	×
UNCOV 277	return static_cast<resT>(cyl_bessel_i0<float>(float(x)));	×
278	}
279	else {	×
280	return cyl_bessel_i0(x);	×
281	}	×
282	}	×
283	};
284	} // namespace dpnp::kernels::i0

IntelPython / dpnp / 14977901736

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous