• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28036549049

23 Jun 2026 03:20PM UTC coverage: 61.52% (-0.3%) from 61.779%
28036549049

Pull #808

github

web-flow
Merge 2133a817f into 6ee760cce
Pull Request #808: enable batched gemm and memcpy on AMD GPUs analogous to the existing CUDA implementation

60 of 332 new or added lines in 7 files covered. (18.07%)

5 existing lines in 2 files now uncovered.

37177 of 60431 relevant lines covered (61.52%)

1008.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

72.33
/python/docc/compiler/docc_program.py
1
from abc import ABC, abstractmethod
4✔
2
import sys
4✔
3
from typing import Any, Dict, Optional
4✔
4
import json
4✔
5
import os
4✔
6
import re
4✔
7

8
from docc.sdfg import StructuredSDFG, TargetOptions
4✔
9
from docc.sdfg._sdfg import (
4✔
10
    _enable_statistics,
11
    _statistics_enabled_by_env,
12
    _statistics_summary,
13
)
14
from docc.compiler.compiled_sdfg import CompiledSDFG
4✔
15
from docc.compiler.target_registry import (
4✔
16
    get_target_schedule_fn,
17
    get_target_compile_fn,
18
    get_target_expand_fn,
19
    register_target_overrides,
20
)
21

22

23
def _parse_docc_debug() -> dict[str, str]:
4✔
24
    debug_env = os.environ.get("DOCC_DEBUG", "")
4✔
25
    debug_dict = {}
4✔
26
    if debug_env:
4✔
27
        for entry in re.split(r"[;:]", debug_env):
×
28
            if not entry:
×
29
                continue
×
30
            parts = entry.split("=", 1)
×
31
            key = parts[0].strip()
×
32
            value = parts[1].strip() if len(parts) > 1 else ""
×
33
            debug_dict[key] = value
×
34
    return debug_dict
4✔
35

36

37
def _is_debug_dump(flags: dict[str, str]) -> bool:
4✔
38
    return "dump" in flags
4✔
39

40

41
def _is_debug_compile(flags: dict[str, str]) -> bool:
4✔
42
    return "build" in flags
4✔
43

44

45
def _get_build_thread_count(flags: dict[str, str]) -> int:
4✔
46
    return int(flags.get("build_threads", "0"))
4✔
47

48

49
class DoccProgram(ABC):
4✔
50

51
    def __init__(
4✔
52
        self,
53
        name: str,
54
        target: str = "none",
55
        category: str = "server",
56
        instrumentation_mode: Optional[str] = None,
57
        capture_args: Optional[bool] = None,
58
        remote_tuning: bool = False,
59
    ):
60
        self.name = name
4✔
61
        self.target = target
4✔
62
        self.category = category
4✔
63
        self.remote_tuning = remote_tuning
4✔
64
        self.last_sdfg: Optional[StructuredSDFG] = None
4✔
65
        self._device_resident: bool = False
4✔
66
        self._device_backend: Optional[str] = None
4✔
67
        self.cache: dict = {}
4✔
68
        debug_flags = _parse_docc_debug()
4✔
69
        self.debug_dump: bool = _is_debug_dump(debug_flags)
4✔
70
        self.debug_build: bool = _is_debug_compile(debug_flags)
4✔
71
        self.build_thread_count: int = _get_build_thread_count(debug_flags)
4✔
72

73
        # Check environment variable DOCC_CI
74
        docc_ci = os.environ.get("DOCC_CI", "")
4✔
75
        if docc_ci:
4✔
76
            if docc_ci == "regions":
×
77
                if instrumentation_mode is None:
×
78
                    instrumentation_mode = "ols"
×
79
            elif docc_ci == "arg-capture":
×
80
                if capture_args is None:
×
81
                    capture_args = True
×
82
            else:
83
                # Full mode (or unknown value treated as full)
84
                if instrumentation_mode is None:
×
85
                    instrumentation_mode = "ols"
×
86
                if capture_args is None:
×
87
                    capture_args = True
×
88

89
        self.instrumentation_mode = instrumentation_mode
4✔
90
        self.capture_args = capture_args
4✔
91

92
    @abstractmethod
4✔
93
    def __call__(self, *args: Any) -> Any:
4✔
94
        pass
×
95

96
    @abstractmethod
4✔
97
    def compile(self, *args: Any, output_folder: Optional[str] = None) -> CompiledSDFG:
4✔
98
        pass
×
99

100
    def _resolve_compile_options(
4✔
101
        self,
102
        instrumentation_mode: Optional[str] = None,
103
        capture_args: Optional[bool] = None,
104
        remote_tuning: Optional[bool] = None,
105
    ) -> tuple[str, bool, bool]:
106
        """Resolve compile-time options, falling back to instance defaults and env vars."""
107
        if instrumentation_mode is None:
4✔
108
            instrumentation_mode = self.instrumentation_mode
4✔
109
        if capture_args is None:
4✔
110
            capture_args = self.capture_args
4✔
111
        if remote_tuning is None:
4✔
112
            remote_tuning = self.remote_tuning
4✔
113

114
        # Check environment variable DOCC_CI
115
        docc_ci = os.environ.get("DOCC_CI", "")
4✔
116
        if docc_ci:
4✔
117
            if docc_ci == "regions":
×
118
                if instrumentation_mode is None:
×
119
                    instrumentation_mode = "ols"
×
120
            elif docc_ci == "arg-capture":
×
121
                if capture_args is None:
×
122
                    capture_args = True
×
123
            else:
124
                # Full mode (or unknown value treated as full)
125
                if instrumentation_mode is None:
×
126
                    instrumentation_mode = "ols"
×
127
                if capture_args is None:
×
128
                    capture_args = True
×
129

130
        # Defaults
131
        if instrumentation_mode is None:
4✔
132
            instrumentation_mode = ""
4✔
133
        if capture_args is None:
4✔
134
            capture_args = False
4✔
135

136
        return instrumentation_mode, capture_args, remote_tuning
4✔
137

138
    def sdfg_pipe(
4✔
139
        self,
140
        sdfg: StructuredSDFG,
141
        output_folder: Optional[str],
142
        instrumentation_mode: str,
143
        capture_args: bool,
144
        remote_tuning: Optional[bool] = None,
145
    ) -> str:
146

147
        if self.debug_dump:
4✔
148
            sdfg.dump(output_folder, "py0.parsed", dump_dot=True)
×
149

150
        # Enable statistics if envvar is set
151
        if _statistics_enabled_by_env():
4✔
152
            _enable_statistics()
×
153

154
        sdfg.validate()
4✔
155

156
        if remote_tuning is None:
4✔
157
            remote_tuning = self.remote_tuning
×
158

159
        target_options = TargetOptions()
4✔
160
        target_options.target = self.target
4✔
161
        target_options.category = self.category
4✔
162
        target_options.remote_tuning = remote_tuning
4✔
163

164
        # Einsum detection
165
        sdfg.einsum()
4✔
166
        if self.debug_dump:
4✔
167
            sdfg.dump(output_folder, "py1.einsum", dump_dot=True)
×
168

169
        # Tensor targets keep tensor nodes
170
        custom_expand_fn = get_target_expand_fn(self.target)
4✔
171
        if custom_expand_fn is not None:
4✔
172
            custom_expand_fn(sdfg, self.category, {})
4✔
173
        else:
174
            sdfg.expand(target_options)
4✔
175
        if self.debug_dump:
4✔
176
            sdfg.dump(output_folder, "py2.expanded", dump_dot=True)
×
177

178
        # Simplify pipelines
179
        sdfg.simplify()
4✔
180
        if self.debug_dump:
4✔
181
            sdfg.dump(output_folder, "py3.opt", dump_dot=True)
×
182

183
        # Normalization for scheduling
184
        if self.target != "none":
4✔
185
            sdfg.normalize()
4✔
186

187
        if self.debug_dump or instrumentation_mode or capture_args:
4✔
188
            sdfg.dump(
4✔
189
                output_folder,
190
                "py4.norm",
191
                dump_dot=self.debug_dump,
192
                dump_json=True,
193
                record_for_instrumentation=True,
194
            )
195

196
        # Schedule if target is specified
197

198
        custom_schedule_fn = get_target_schedule_fn(self.target)
4✔
199
        if custom_schedule_fn is not None:
4✔
200
            custom_schedule_fn(sdfg, self.category, {"remote_tuning": remote_tuning})
4✔
201
        else:
202
            sdfg.schedule(target_options)
4✔
203

204
        if self.debug_dump:
4✔
205
            sdfg.dump(output_folder, "py5.post_sched", dump_dot=True)
×
206

207
        # Promote pointer arguments to device residency when the whole program keeps
208
        # data on device. Only enabled for the CUDA target: the device-resident
209
        # runtime path marshals arguments via cupy (CUDA), so promoting on other
210
        # backends (e.g. rocm) produces a device-resident ABI binary that the
211
        # runtime cannot feed correctly, causing GPU memory faults. Communicated
212
        # explicitly via the pass return value (bool), not through SDFG metadata.
213
        self._device_resident = False
4✔
214
        self._device_backend = None
4✔
215
        if self.target == "cuda":
4✔
NEW
216
            if sdfg.promote_device_residency(False):
×
217
                self._device_resident = True
×
218
                self._device_backend = self.target
×
219

220
        self.last_sdfg = sdfg
4✔
221

222
        custom_compile_fn = get_target_compile_fn(self.target)
4✔
223
        if custom_compile_fn is not None:
4✔
224
            lib_path = custom_compile_fn(
4✔
225
                sdfg,
226
                output_folder,
227
                instrumentation_mode,
228
                capture_args,
229
                {"debug_build": self.debug_build, "threads": self.build_thread_count},
230
            )
231
        else:
232
            lib_path = sdfg._compile(
4✔
233
                output_folder=output_folder,
234
                target=self.target,
235
                instrumentation_mode=instrumentation_mode,
236
                capture_args=capture_args,
237
                debug_build=self.debug_build,
238
                threads=self.build_thread_count,
239
            )
240

241
        # Dump statistics after compile
242
        if _statistics_enabled_by_env():
4✔
243
            print(_statistics_summary(), file=sys.stderr)
×
244

245
        # Record the device-residency decision in the persisted (py4.norm) SDFG
246
        # metadata. It is computed here (not stored in metadata by the pass) and
247
        # decides host vs device argument marshalling. Binary-reuse paths
248
        # (DOCC_REUSE_BINARIES) load only the cached .so + normalized SDFG and
249
        # never re-run scheduling/promotion, so without this they default to
250
        # host execution and feed host pointers into a device-resident binary
251
        # -> heap corruption / double free.
252
        if output_folder:
4✔
253
            self._persist_device_residency(output_folder, sdfg)
4✔
254

255
        return lib_path
4✔
256

257
    def _persist_device_residency(
4✔
258
        self, output_folder: str, sdfg: StructuredSDFG
259
    ) -> None:
260
        """Stamp the device-residency decision into the persisted SDFG metadata.
261

262
        Patches only the ``metadata`` object of the already-written
263
        ``py4.norm.json`` (the file the reuse path loads), leaving the SDFG
264
        structure and element IDs untouched so instrumentation references stay
265
        valid.
266
        """
267
        json_path = os.path.join(output_folder, f"{sdfg.name}.py4.norm.json")
4✔
268
        try:
4✔
269
            with open(json_path) as f:
4✔
270
                data = json.load(f)
4✔
271
            metadata = data.setdefault("metadata", {})
4✔
272
            metadata["device_resident"] = "1" if self._device_resident else "0"
4✔
273
            metadata["device_backend"] = self._device_backend or ""
4✔
274
            with open(json_path, "w") as f:
4✔
275
                json.dump(data, f)
4✔
276
        except (OSError, ValueError):
4✔
277
            pass
4✔
278

279
    @abstractmethod
4✔
280
    def to_sdfg(self, *args: Any) -> StructuredSDFG:
4✔
281
        pass
×
282

283
    @abstractmethod
4✔
284
    def _convert_inputs(self, args: tuple) -> tuple:
4✔
285
        pass
×
286

287
    @abstractmethod
4✔
288
    def _convert_outputs(self, result: Any, original_args: tuple) -> Any:
4✔
289
        pass
×
290

291
    def _get_cache_key(self, *args: Any) -> str:
4✔
292
        return ""
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc