• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 14630481637

23 Apr 2025 07:04PM UTC coverage: 72.178% (-0.002%) from 72.18%
14630481637

push

github

DaanDeMeyer
mkosi: Run clangd within the tools tree instead of the build container

Running within the build sandbox has a number of disadvantages:
- We have a separate clangd cache for each distribution/release combo
- It requires to build the full image before clangd can be used
- It breaks every time the image becomes out of date and requires a
  rebuild
- We can't look at system headers as we don't have the knowledge to map
  them from inside the build sandbox to the corresponding path on the host

Instead, let's have mkosi.clangd run clangd within the tools tree. We
already require building systemd for both the host and the target anyway,
and all the dependencies to build systemd are installed in the tools tree
already for that, as well as clangd since it's installed together with the
other clang tooling we install in the tools tree. Unlike the previous approach,
this approach only requires the mkosi tools tree to be built upfront, which has
a much higher chance of not invalidating its cache. We can also trivially map
system header lookups from within the sandbox to the path within mkosi.tools
on the host so that starts working as well.

297054 of 411557 relevant lines covered (72.18%)

686269.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.85
/src/basic/cgroup-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <errno.h>
4
#include <limits.h>
5
#include <signal.h>
6
#include <stddef.h>
7
#include <stdlib.h>
8
#include <sys/types.h>
9
#include <sys/utsname.h>
10
#include <sys/xattr.h>
11
#include <threads.h>
12
#include <unistd.h>
13

14
#include "alloc-util.h"
15
#include "capsule-util.h"
16
#include "cgroup-util.h"
17
#include "constants.h"
18
#include "dirent-util.h"
19
#include "extract-word.h"
20
#include "fd-util.h"
21
#include "fileio.h"
22
#include "format-util.h"
23
#include "fs-util.h"
24
#include "log.h"
25
#include "login-util.h"
26
#include "macro.h"
27
#include "missing_fs.h"
28
#include "missing_magic.h"
29
#include "mkdir.h"
30
#include "parse-util.h"
31
#include "path-util.h"
32
#include "process-util.h"
33
#include "set.h"
34
#include "special.h"
35
#include "stat-util.h"
36
#include "stdio-util.h"
37
#include "string-table.h"
38
#include "string-util.h"
39
#include "strv.h"
40
#include "unit-name.h"
41
#include "user-util.h"
42
#include "xattr-util.h"
43

44
int cg_path_open(const char *controller, const char *path) {
786✔
45
        _cleanup_free_ char *fs = NULL;
786✔
46
        int r;
786✔
47

48
        r = cg_get_path(controller, path, /* item=*/ NULL, &fs);
786✔
49
        if (r < 0)
786✔
50
                return r;
51

52
        return RET_NERRNO(open(fs, O_DIRECTORY|O_CLOEXEC));
786✔
53
}
54

55
int cg_cgroupid_open(int cgroupfs_fd, uint64_t id) {
8✔
56
        _cleanup_close_ int fsfd = -EBADF;
8✔
57

58
        if (cgroupfs_fd < 0) {
8✔
59
                fsfd = open("/sys/fs/cgroup", O_CLOEXEC|O_DIRECTORY);
7✔
60
                if (fsfd < 0)
7✔
61
                        return -errno;
×
62

63
                cgroupfs_fd = fsfd;
64
        }
65

66
        cg_file_handle fh = CG_FILE_HANDLE_INIT;
8✔
67
        CG_FILE_HANDLE_CGROUPID(fh) = id;
8✔
68

69
        return RET_NERRNO(open_by_handle_at(cgroupfs_fd, &fh.file_handle, O_DIRECTORY|O_CLOEXEC));
15✔
70
}
71

72
int cg_path_from_cgroupid(int cgroupfs_fd, uint64_t id, char **ret) {
×
73
        _cleanup_close_ int cgfd = -EBADF;
×
74
        int r;
×
75

76
        cgfd = cg_cgroupid_open(cgroupfs_fd, id);
×
77
        if (cgfd < 0)
×
78
                return cgfd;
79

80
        _cleanup_free_ char *path = NULL;
×
81
        r = fd_get_path(cgfd, &path);
×
82
        if (r < 0)
×
83
                return r;
84

85
        if (!path_startswith(path, "/sys/fs/cgroup/"))
×
86
                return -EXDEV; /* recognizable error */
87

88
        if (ret)
×
89
                *ret = TAKE_PTR(path);
×
90
        return 0;
91
}
92

93
int cg_get_cgroupid_at(int dfd, const char *path, uint64_t *ret) {
4,036✔
94
        cg_file_handle fh = CG_FILE_HANDLE_INIT;
4,036✔
95
        int mnt_id;
4,036✔
96

97
        assert(dfd >= 0 || (dfd == AT_FDCWD && path_is_absolute(path)));
8,039✔
98
        assert(ret);
4,036✔
99

100
        /* This is cgroupfs so we know the size of the handle, thus no need to loop around like
101
         * name_to_handle_at_loop() does in mountpoint-util.c */
102
        if (name_to_handle_at(dfd, strempty(path), &fh.file_handle, &mnt_id, isempty(path) ? AT_EMPTY_PATH : 0) < 0) {
8,072✔
103
                assert(errno != EOVERFLOW);
×
104
                return -errno;
×
105
        }
106

107
        *ret = CG_FILE_HANDLE_CGROUPID(fh);
4,036✔
108
        return 0;
4,036✔
109
}
110

111
static int cg_enumerate_items(const char *controller, const char *path, FILE **ret, const char *item) {
22,368✔
112
        _cleanup_free_ char *fs = NULL;
22,368✔
113
        FILE *f;
22,368✔
114
        int r;
22,368✔
115

116
        assert(ret);
22,368✔
117

118
        r = cg_get_path(controller, path, item, &fs);
22,368✔
119
        if (r < 0)
22,368✔
120
                return r;
121

122
        f = fopen(fs, "re");
22,368✔
123
        if (!f)
22,368✔
124
                return -errno;
14,450✔
125

126
        *ret = f;
7,918✔
127
        return 0;
7,918✔
128
}
129

130
int cg_enumerate_processes(const char *controller, const char *path, FILE **ret) {
413✔
131
        return cg_enumerate_items(controller, path, ret, "cgroup.procs");
413✔
132
}
133

134
int cg_read_pid(FILE *f, pid_t *ret, CGroupFlags flags) {
13,272✔
135
        unsigned long ul;
13,272✔
136

137
        /* Note that the cgroup.procs might contain duplicates! See cgroups.txt for details. */
138

139
        assert(f);
13,272✔
140
        assert(ret);
13,272✔
141

142
        for (;;) {
13,272✔
143
                errno = 0;
13,272✔
144
                if (fscanf(f, "%lu", &ul) != 1) {
13,272✔
145

146
                        if (feof(f)) {
8,148✔
147
                                *ret = 0;
8,148✔
148
                                return 0;
8,148✔
149
                        }
150

151
                        return errno_or_else(EIO);
×
152
                }
153

154
                if (ul > PID_T_MAX)
5,124✔
155
                        return -EIO;
156

157
                /* In some circumstances (e.g. WSL), cgroups might contain unmappable PIDs from other
158
                 * contexts. These show up as zeros, and depending on the caller, can either be plain
159
                 * skipped over, or returned as-is. */
160
                if (ul == 0 && !FLAGS_SET(flags, CGROUP_DONT_SKIP_UNMAPPED))
5,124✔
161
                        continue;
×
162

163
                *ret = (pid_t) ul;
5,124✔
164
                return 1;
5,124✔
165
        }
166
}
167

168
int cg_read_pidref(FILE *f, PidRef *ret, CGroupFlags flags) {
10,546✔
169
        int r;
10,546✔
170

171
        assert(f);
10,546✔
172
        assert(ret);
10,546✔
173

174
        for (;;) {
×
175
                pid_t pid;
10,546✔
176

177
                r = cg_read_pid(f, &pid, flags);
10,546✔
178
                if (r < 0)
10,546✔
179
                        return log_debug_errno(r, "Failed to read pid from cgroup item: %m");
×
180
                if (r == 0) {
10,546✔
181
                        *ret = PIDREF_NULL;
7,544✔
182
                        return 0;
7,544✔
183
                }
184

185
                if (pid == 0)
3,002✔
186
                        return -EREMOTE;
187

188
                if (FLAGS_SET(flags, CGROUP_NO_PIDFD)) {
3,002✔
189
                        *ret = PIDREF_MAKE_FROM_PID(pid);
834✔
190
                        return 1;
834✔
191
                }
192

193
                r = pidref_set_pid(ret, pid);
2,168✔
194
                if (r >= 0)
2,168✔
195
                        return 1;
196
                if (r != -ESRCH)
×
197
                        return r;
198

199
                /* ESRCH → gone by now? just skip over it, read the next */
200
        }
201
}
202

203
int cg_read_event(
6,566✔
204
                const char *controller,
205
                const char *path,
206
                const char *event,
207
                char **ret) {
208

209
        _cleanup_free_ char *events = NULL, *content = NULL;
6,566✔
210
        int r;
6,566✔
211

212
        r = cg_get_path(controller, path, "cgroup.events", &events);
6,566✔
213
        if (r < 0)
6,566✔
214
                return r;
215

216
        r = read_full_virtual_file(events, &content, NULL);
6,566✔
217
        if (r < 0)
6,566✔
218
                return r;
219

220
        for (const char *p = content;;) {
259✔
221
                _cleanup_free_ char *line = NULL, *key = NULL;
259✔
222
                const char *q;
259✔
223

224
                r = extract_first_word(&p, &line, "\n", 0);
259✔
225
                if (r < 0)
259✔
226
                        return r;
227
                if (r == 0)
259✔
228
                        return -ENOENT;
229

230
                q = line;
259✔
231
                r = extract_first_word(&q, &key, " ", 0);
259✔
232
                if (r < 0)
259✔
233
                        return r;
234
                if (r == 0)
259✔
235
                        return -EINVAL;
236

237
                if (!streq(key, event))
259✔
238
                        continue;
×
239

240
                return strdup_to(ret, q);
259✔
241
        }
242
}
243

244
bool cg_kill_supported(void) {
×
245
        static thread_local int supported = -1;
×
246

247
        if (supported >= 0)
×
248
                return supported;
×
249

250
        if (cg_all_unified() <= 0)
×
251
                return (supported = false);
×
252

253
        if (access("/sys/fs/cgroup/init.scope/cgroup.kill", F_OK) >= 0)
×
254
                return (supported = true);
×
255
        if (errno != ENOENT)
×
256
                log_debug_errno(errno, "Failed to check whether cgroup.kill is available, assuming not: %m");
×
257
        return (supported = false);
×
258
}
259

260
int cg_enumerate_subgroups(const char *controller, const char *path, DIR **ret) {
21,206✔
261
        _cleanup_free_ char *fs = NULL;
21,206✔
262
        DIR *d;
21,206✔
263
        int r;
21,206✔
264

265
        assert(ret);
21,206✔
266

267
        /* This is not recursive! */
268

269
        r = cg_get_path(controller, path, NULL, &fs);
21,206✔
270
        if (r < 0)
21,206✔
271
                return r;
272

273
        d = opendir(fs);
21,206✔
274
        if (!d)
21,206✔
275
                return -errno;
12,359✔
276

277
        *ret = d;
8,847✔
278
        return 0;
8,847✔
279
}
280

281
int cg_read_subgroup(DIR *d, char **ret) {
14,259✔
282
        assert(d);
14,259✔
283
        assert(ret);
14,259✔
284

285
        FOREACH_DIRENT_ALL(de, d, return -errno) {
427,810✔
286
                if (de->d_type != DT_DIR)
418,733✔
287
                        continue;
395,397✔
288

289
                if (dot_or_dot_dot(de->d_name))
23,336✔
290
                        continue;
18,154✔
291

292
                return strdup_to_full(ret, de->d_name);
5,182✔
293
        }
294

295
        *ret = NULL;
9,077✔
296
        return 0;
9,077✔
297
}
298

299
static int cg_kill_items(
21,856✔
300
                const char *path,
301
                const char *item,
302
                int sig,
303
                CGroupFlags flags,
304
                Set *s,
305
                cg_kill_log_func_t log_kill,
306
                void *userdata) {
307

308
        _cleanup_set_free_ Set *allocated_set = NULL;
21,856✔
309
        int r, ret = 0;
21,856✔
310

311
        assert(path);
21,856✔
312
        assert(item);
21,856✔
313
        assert(sig >= 0);
21,856✔
314

315
         /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence
316
          * don't send SIGCONT on SIGKILL. */
317
        if (IN_SET(sig, SIGCONT, SIGKILL))
21,856✔
318
                flags &= ~CGROUP_SIGCONT;
8,510✔
319

320
        /* This goes through the tasks list and kills them all. This is repeated until no further processes
321
         * are added to the tasks list, to properly handle forking processes.
322
         *
323
         * When sending SIGKILL, prefer cg_kill_kernel_sigkill(), which is fully atomic. */
324

325
        if (!s) {
21,856✔
326
                s = allocated_set = set_new(NULL);
1,494✔
327
                if (!s)
1,494✔
328
                        return -ENOMEM;
329
        }
330

331
        bool done;
21,955✔
332
        do {
21,955✔
333
                _cleanup_fclose_ FILE *f = NULL;
14,450✔
334
                int ret_log_kill;
21,955✔
335

336
                done = true;
21,955✔
337

338
                r = cg_enumerate_items(SYSTEMD_CGROUP_CONTROLLER, path, &f, item);
21,955✔
339
                if (r == -ENOENT)
21,955✔
340
                        break;
341
                if (r < 0)
7,505✔
342
                        return RET_GATHER(ret, log_debug_errno(r, "Failed to enumerate cgroup items: %m"));
×
343

344
                for (;;) {
10,421✔
345
                        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
10,421✔
346

347
                        r = cg_read_pidref(f, &pidref, flags);
10,421✔
348
                        if (r < 0)
10,421✔
349
                                return RET_GATHER(ret, log_debug_errno(r, "Failed to read pidref from cgroup '%s': %m", path));
×
350
                        if (r == 0)
10,421✔
351
                                break;
352

353
                        if ((flags & CGROUP_IGNORE_SELF) && pidref_is_self(&pidref))
2,916✔
354
                                continue;
1,496✔
355

356
                        if (set_contains(s, PID_TO_PTR(pidref.pid)))
1,420✔
357
                                continue;
1,027✔
358

359
                        /* Ignore kernel threads to mimic the behavior of cgroup.kill. */
360
                        if (pidref_is_kernel_thread(&pidref) > 0) {
393✔
361
                                log_debug("Ignoring kernel thread with pid " PID_FMT " in cgroup '%s'", pidref.pid, path);
×
362
                                continue;
×
363
                        }
364

365
                        if (log_kill)
393✔
366
                                ret_log_kill = log_kill(&pidref, sig, userdata);
92✔
367

368
                        /* If we haven't killed this process yet, kill it */
369
                        r = pidref_kill(&pidref, sig);
393✔
370
                        if (r < 0 && r != -ESRCH)
393✔
371
                                RET_GATHER(ret, log_debug_errno(r, "Failed to kill process with pid " PID_FMT " from cgroup '%s': %m", pidref.pid, path));
×
372
                        if (r >= 0) {
393✔
373
                                if (flags & CGROUP_SIGCONT)
393✔
374
                                        (void) pidref_kill(&pidref, SIGCONT);
298✔
375

376
                                if (ret == 0) {
393✔
377
                                        if (log_kill)
170✔
378
                                                ret = ret_log_kill;
379
                                        else
380
                                                ret = 1;
78✔
381
                                }
382
                        }
383

384
                        done = false;
393✔
385

386
                        r = set_put(s, PID_TO_PTR(pidref.pid));
393✔
387
                        if (r < 0)
393✔
388
                                return RET_GATHER(ret, r);
×
389
                }
390

391
                /* To avoid racing against processes which fork quicker than we can kill them, we repeat this
392
                 * until no new pids need to be killed. */
393

394
        } while (!done);
7,505✔
395

396
        return ret;
397
}
398

399
int cg_kill(
17,601✔
400
                const char *path,
401
                int sig,
402
                CGroupFlags flags,
403
                Set *s,
404
                cg_kill_log_func_t log_kill,
405
                void *userdata) {
406

407
        int r, ret;
17,601✔
408

409
        assert(path);
17,601✔
410

411
        ret = cg_kill_items(path, "cgroup.procs", sig, flags, s, log_kill, userdata);
17,601✔
412
        if (ret < 0)
17,601✔
413
                return log_debug_errno(ret, "Failed to kill processes in cgroup '%s' item cgroup.procs: %m", path);
×
414
        if (sig != SIGKILL)
17,601✔
415
                return ret;
416

417
        /* Only in case of killing with SIGKILL and when using cgroupsv2, kill remaining threads manually as
418
           a workaround for kernel bug. It was fixed in 5.2-rc5 (c03cd7738a83), backported to 4.19.66
419
           (4340d175b898) and 4.14.138 (feb6b123b7dd). */
420
        r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
4,255✔
421
        if (r < 0)
4,255✔
422
                return r;
423
        if (r == 0)
4,255✔
424
                return ret;
425

426
        /* Opening pidfds for non thread group leaders only works from 6.9 onwards with PIDFD_THREAD. On
427
         * older kernels or without PIDFD_THREAD pidfd_open() fails with EINVAL. Since we might read non
428
         * thread group leader IDs from cgroup.threads, we set CGROUP_NO_PIDFD to avoid trying open pidfd's
429
         * for them and instead use the regular pid. */
430
        r = cg_kill_items(path, "cgroup.threads", sig, flags|CGROUP_NO_PIDFD, s, log_kill, userdata);
4,255✔
431
        if (r < 0)
4,255✔
432
                return log_debug_errno(r, "Failed to kill processes in cgroup '%s' item cgroup.threads: %m", path);
×
433

434
        return r > 0 || ret > 0;
4,255✔
435
}
436

437
int cg_kill_recursive(
16,852✔
438
                const char *path,
439
                int sig,
440
                CGroupFlags flags,
441
                Set *s,
442
                cg_kill_log_func_t log_kill,
443
                void *userdata) {
444

445
        _cleanup_set_free_ Set *allocated_set = NULL;
×
446
        _cleanup_closedir_ DIR *d = NULL;
16,852✔
447
        int r, ret;
16,852✔
448

449
        assert(path);
16,852✔
450
        assert(sig >= 0);
16,852✔
451

452
        if (!s) {
16,852✔
453
                s = allocated_set = set_new(NULL);
16,229✔
454
                if (!s)
16,229✔
455
                        return -ENOMEM;
456
        }
457

458
        ret = cg_kill(path, sig, flags, s, log_kill, userdata);
16,852✔
459

460
        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
16,852✔
461
        if (r < 0) {
16,852✔
462
                if (r != -ENOENT)
12,359✔
463
                        RET_GATHER(ret, log_debug_errno(r, "Failed to enumerate cgroup '%s' subgroups: %m", path));
×
464

465
                return ret;
12,359✔
466
        }
467

468
        for (;;) {
4,625✔
469
                _cleanup_free_ char *fn = NULL, *p = NULL;
4,559✔
470

471
                r = cg_read_subgroup(d, &fn);
4,559✔
472
                if (r < 0) {
4,559✔
473
                        RET_GATHER(ret, log_debug_errno(r, "Failed to read subgroup from cgroup '%s': %m", path));
×
474
                        break;
475
                }
476
                if (r == 0)
4,559✔
477
                        break;
478

479
                p = path_join(empty_to_root(path), fn);
66✔
480
                if (!p)
66✔
481
                        return -ENOMEM;
×
482

483
                r = cg_kill_recursive(p, sig, flags, s, log_kill, userdata);
66✔
484
                if (r < 0)
66✔
485
                        log_debug_errno(r, "Failed to recursively kill processes in cgroup '%s': %m", p);
×
486
                if (r != 0 && ret >= 0)
66✔
487
                        ret = r;
15✔
488
        }
489

490
        return ret;
4,493✔
491
}
492

493
int cg_kill_kernel_sigkill(const char *path) {
×
494
        _cleanup_free_ char *killfile = NULL;
×
495
        int r;
×
496

497
        /* Kills the cgroup at `path` directly by writing to its cgroup.kill file.  This sends SIGKILL to all
498
         * processes in the cgroup and has the advantage of being completely atomic, unlike cg_kill_items(). */
499

500
        assert(path);
×
501

502
        if (!cg_kill_supported())
×
503
                return -EOPNOTSUPP;
504

505
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.kill", &killfile);
×
506
        if (r < 0)
×
507
                return r;
508

509
        r = write_string_file(killfile, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
×
510
        if (r < 0)
×
511
                return log_debug_errno(r, "Failed to write to cgroup.kill for cgroup '%s': %m", path);
×
512

513
        return 0;
514
}
515

516
static const char *controller_to_dirname(const char *controller) {
×
517
        assert(controller);
×
518

519
        /* Converts a controller name to the directory name below /sys/fs/cgroup/ we want to mount it
520
         * to. Effectively, this just cuts off the name= prefixed used for named hierarchies, if it is
521
         * specified. */
522

523
        if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
×
524
                if (cg_hybrid_unified() > 0)
×
525
                        controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
526
                else
527
                        controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
×
528
        }
529

530
        return startswith(controller, "name=") ?: controller;
×
531
}
532

533
static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **ret) {
×
534
        const char *dn;
×
535
        char *t = NULL;
×
536

537
        assert(ret);
×
538
        assert(controller);
×
539

540
        dn = controller_to_dirname(controller);
×
541

542
        if (isempty(path) && isempty(suffix))
×
543
                t = path_join("/sys/fs/cgroup", dn);
×
544
        else if (isempty(path))
×
545
                t = path_join("/sys/fs/cgroup", dn, suffix);
×
546
        else if (isempty(suffix))
×
547
                t = path_join("/sys/fs/cgroup", dn, path);
×
548
        else
549
                t = path_join("/sys/fs/cgroup", dn, path, suffix);
×
550
        if (!t)
×
551
                return -ENOMEM;
552

553
        *ret = t;
×
554
        return 0;
×
555
}
556

557
static int join_path_unified(const char *path, const char *suffix, char **ret) {
241,949✔
558
        char *t;
241,949✔
559

560
        assert(ret);
241,949✔
561

562
        if (isempty(path) && isempty(suffix))
255,898✔
563
                t = strdup("/sys/fs/cgroup");
1,225✔
564
        else if (isempty(path))
240,724✔
565
                t = path_join("/sys/fs/cgroup", suffix);
12,724✔
566
        else if (isempty(suffix))
228,000✔
567
                t = path_join("/sys/fs/cgroup", path);
84,194✔
568
        else
569
                t = path_join("/sys/fs/cgroup", path, suffix);
143,806✔
570
        if (!t)
241,949✔
571
                return -ENOMEM;
572

573
        *ret = t;
241,949✔
574
        return 0;
241,949✔
575
}
576

577
int cg_get_path(const char *controller, const char *path, const char *suffix, char **ret) {
242,206✔
578
        int r;
242,206✔
579

580
        assert(ret);
242,206✔
581

582
        if (!controller) {
242,206✔
583
                char *t;
257✔
584

585
                /* If no controller is specified, we return the path *below* the controllers, without any
586
                 * prefix. */
587

588
                if (isempty(path) && isempty(suffix))
257✔
589
                        return -EINVAL;
590

591
                if (isempty(suffix))
257✔
592
                        t = strdup(path);
×
593
                else if (isempty(path))
257✔
594
                        t = strdup(suffix);
×
595
                else
596
                        t = path_join(path, suffix);
257✔
597
                if (!t)
257✔
598
                        return -ENOMEM;
599

600
                *ret = path_simplify(t);
257✔
601
                return 0;
257✔
602
        }
603

604
        if (!cg_controller_is_valid(controller))
241,949✔
605
                return -EINVAL;
606

607
        r = cg_all_unified();
241,949✔
608
        if (r < 0)
241,949✔
609
                return r;
610
        if (r > 0)
241,949✔
611
                r = join_path_unified(path, suffix, ret);
241,949✔
612
        else
613
                r = join_path_legacy(controller, path, suffix, ret);
×
614
        if (r < 0)
241,949✔
615
                return r;
616

617
        path_simplify(*ret);
241,949✔
618
        return 0;
241,949✔
619
}
620

621
static int controller_is_v1_accessible(const char *root, const char *controller) {
×
622
        const char *cpath, *dn;
×
623

624
        assert(controller);
×
625

626
        dn = controller_to_dirname(controller);
×
627

628
        /* If root if specified, we check that:
629
         * - possible subcgroup is created at root,
630
         * - we can modify the hierarchy. */
631

632
        cpath = strjoina("/sys/fs/cgroup/", dn, root, root ? "/cgroup.procs" : NULL);
×
633
        return access_nofollow(cpath, root ? W_OK : F_OK);
×
634
}
635

636
int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **ret) {
18,627✔
637
        int r;
18,627✔
638

639
        assert(controller);
18,627✔
640
        assert(ret);
18,627✔
641

642
        if (!cg_controller_is_valid(controller))
18,627✔
643
                return -EINVAL;
644

645
        r = cg_all_unified();
18,627✔
646
        if (r < 0)
18,627✔
647
                return r;
648
        if (r > 0) {
18,627✔
649
                /* In the unified hierarchy all controllers are considered accessible,
650
                 * except for the named hierarchies */
651
                if (startswith(controller, "name="))
18,627✔
652
                        return -EOPNOTSUPP;
653
        } else {
654
                /* Check if the specified controller is actually accessible */
655
                r = controller_is_v1_accessible(NULL, controller);
×
656
                if (r < 0)
×
657
                        return r;
658
        }
659

660
        return cg_get_path(controller, path, suffix, ret);
18,627✔
661
}
662

663
int cg_set_xattr(const char *path, const char *name, const void *value, size_t size, int flags) {
4,450✔
664
        _cleanup_free_ char *fs = NULL;
4,450✔
665
        int r;
4,450✔
666

667
        assert(path);
4,450✔
668
        assert(name);
4,450✔
669
        assert(value || size <= 0);
4,450✔
670

671
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
4,450✔
672
        if (r < 0)
4,450✔
673
                return r;
674

675
        return RET_NERRNO(setxattr(fs, name, value, size, flags));
4,450✔
676
}
677

678
int cg_get_xattr_malloc(const char *path, const char *name, char **ret, size_t *ret_size) {
15,514✔
679
        _cleanup_free_ char *fs = NULL;
15,514✔
680
        int r;
15,514✔
681

682
        assert(path);
15,514✔
683
        assert(name);
15,514✔
684

685
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
15,514✔
686
        if (r < 0)
15,514✔
687
                return r;
688

689
        return lgetxattr_malloc(fs, name, ret, ret_size);
15,514✔
690
}
691

692
int cg_get_xattr_bool(const char *path, const char *name) {
147✔
693
        _cleanup_free_ char *fs = NULL;
147✔
694
        int r;
147✔
695

696
        assert(path);
147✔
697
        assert(name);
147✔
698

699
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
147✔
700
        if (r < 0)
147✔
701
                return r;
702

703
        return getxattr_at_bool(AT_FDCWD, fs, name, /* flags= */ 0);
147✔
704
}
705

706
int cg_remove_xattr(const char *path, const char *name) {
24,710✔
707
        _cleanup_free_ char *fs = NULL;
24,710✔
708
        int r;
24,710✔
709

710
        assert(path);
24,710✔
711
        assert(name);
24,710✔
712

713
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
24,710✔
714
        if (r < 0)
24,710✔
715
                return r;
716

717
        return RET_NERRNO(removexattr(fs, name));
49,420✔
718
}
719

720
int cg_pid_get_path(const char *controller, pid_t pid, char **ret_path) {
39,885✔
721
        _cleanup_fclose_ FILE *f = NULL;
39,885✔
722
        const char *fs, *controller_str = NULL;  /* avoid false maybe-uninitialized warning */
39,885✔
723
        int unified, r;
39,885✔
724

725
        assert(pid >= 0);
39,885✔
726
        assert(ret_path);
39,885✔
727

728
        if (controller) {
39,885✔
729
                if (!cg_controller_is_valid(controller))
39,700✔
730
                        return -EINVAL;
731
        } else
732
                controller = SYSTEMD_CGROUP_CONTROLLER;
733

734
        unified = cg_unified_controller(controller);
39,885✔
735
        if (unified < 0)
39,885✔
736
                return unified;
737
        if (unified == 0) {
39,885✔
738
                if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
×
739
                        controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
740
                else
741
                        controller_str = controller;
×
742
        }
743

744
        fs = procfs_file_alloca(pid, "cgroup");
39,885✔
745
        r = fopen_unlocked(fs, "re", &f);
39,885✔
746
        if (r == -ENOENT)
39,885✔
747
                return -ESRCH;
748
        if (r < 0)
36,722✔
749
                return r;
750

751
        for (;;) {
36,722✔
752
                _cleanup_free_ char *line = NULL;
36,722✔
753
                char *e;
36,722✔
754

755
                r = read_line(f, LONG_LINE_MAX, &line);
36,722✔
756
                if (r < 0)
36,722✔
757
                        return r;
758
                if (r == 0)
36,713✔
759
                        return -ENODATA;
760

761
                if (unified) {
36,713✔
762
                        e = startswith(line, "0:");
36,713✔
763
                        if (!e)
36,713✔
764
                                continue;
×
765

766
                        e = strchr(e, ':');
36,713✔
767
                        if (!e)
36,713✔
768
                                continue;
×
769
                } else {
770
                        char *l;
×
771

772
                        l = strchr(line, ':');
×
773
                        if (!l)
×
774
                                continue;
×
775

776
                        l++;
×
777
                        e = strchr(l, ':');
×
778
                        if (!e)
×
779
                                continue;
×
780
                        *e = 0;
×
781

782
                        assert(controller_str);
×
783
                        r = string_contains_word(l, ",", controller_str);
×
784
                        if (r < 0)
×
785
                                return r;
786
                        if (r == 0)
×
787
                                continue;
×
788
                }
789

790
                _cleanup_free_ char *path = strdup(e + 1);
36,713✔
791
                if (!path)
36,713✔
792
                        return -ENOMEM;
793

794
                /* Refuse cgroup paths from outside our cgroup namespace */
795
                if (startswith(path, "/../"))
36,713✔
796
                        return -EUNATCH;
797

798
                /* Truncate suffix indicating the process is a zombie */
799
                e = endswith(path, " (deleted)");
36,713✔
800
                if (e)
36,713✔
801
                        *e = 0;
294✔
802

803
                *ret_path = TAKE_PTR(path);
36,713✔
804
                return 0;
36,713✔
805
        }
806
}
807

808
int cg_pidref_get_path(const char *controller, const PidRef *pidref, char **ret_path) {
10,811✔
809
        _cleanup_free_ char *path = NULL;
10,811✔
810
        int r;
10,811✔
811

812
        assert(ret_path);
10,811✔
813

814
        if (!pidref_is_set(pidref))
10,811✔
815
                return -ESRCH;
816
        if (pidref_is_remote(pidref))
21,622✔
817
                return -EREMOTE;
818

819
        // XXX: Ideally we'd use pidfd_get_cgroupid() + cg_path_from_cgroupid() here, to extract this
820
        // bit of information from pidfd directly. However, the latter requires privilege and it's
821
        // not entirely clear how to handle cgroups from outer namespace.
822

823
        r = cg_pid_get_path(controller, pidref->pid, &path);
10,811✔
824
        if (r < 0)
10,811✔
825
                return r;
826

827
        /* Before we return the path, make sure the procfs entry for this pid still matches the pidref */
828
        r = pidref_verify(pidref);
10,809✔
829
        if (r < 0)
10,809✔
830
                return r;
831

832
        *ret_path = TAKE_PTR(path);
10,809✔
833
        return 0;
10,809✔
834
}
835

836
int cg_is_empty(const char *controller, const char *path) {
4✔
837
        _cleanup_fclose_ FILE *f = NULL;
4✔
838
        pid_t pid;
4✔
839
        int r;
4✔
840

841
        assert(path);
4✔
842

843
        r = cg_enumerate_processes(controller, path, &f);
4✔
844
        if (r == -ENOENT)
4✔
845
                return true;
846
        if (r < 0)
4✔
847
                return r;
848

849
        r = cg_read_pid(f, &pid, CGROUP_DONT_SKIP_UNMAPPED);
4✔
850
        if (r < 0)
4✔
851
                return r;
852

853
        return r == 0;
4✔
854
}
855

856
int cg_is_empty_recursive(const char *controller, const char *path) {
6,566✔
857
        int r;
6,566✔
858

859
        assert(path);
6,566✔
860

861
        /* The root cgroup is always populated */
862
        if (controller && empty_or_root(path))
6,566✔
863
                return false;
864

865
        r = cg_unified_controller(controller);
6,566✔
866
        if (r < 0)
6,566✔
867
                return r;
868
        if (r > 0) {
6,566✔
869
                _cleanup_free_ char *t = NULL;
6,566✔
870

871
                /* On the unified hierarchy we can check empty state
872
                 * via the "populated" attribute of "cgroup.events". */
873

874
                r = cg_read_event(controller, path, "populated", &t);
6,566✔
875
                if (r == -ENOENT)
6,566✔
876
                        return true;
877
                if (r < 0)
259✔
878
                        return r;
879

880
                return streq(t, "0");
259✔
881
        } else {
882
                _cleanup_closedir_ DIR *d = NULL;
×
883
                char *fn;
×
884

885
                r = cg_is_empty(controller, path);
×
886
                if (r <= 0)
×
887
                        return r;
888

889
                r = cg_enumerate_subgroups(controller, path, &d);
×
890
                if (r == -ENOENT)
×
891
                        return true;
892
                if (r < 0)
×
893
                        return r;
894

895
                while ((r = cg_read_subgroup(d, &fn)) > 0) {
×
896
                        _cleanup_free_ char *p = NULL;
×
897

898
                        p = path_join(path, fn);
×
899
                        free(fn);
×
900
                        if (!p)
×
901
                                return -ENOMEM;
902

903
                        r = cg_is_empty_recursive(controller, p);
×
904
                        if (r <= 0)
×
905
                                return r;
906
                }
907
                if (r < 0)
×
908
                        return r;
909

910
                return true;
×
911
        }
912
}
913

914
int cg_split_spec(const char *spec, char **ret_controller, char **ret_path) {
23✔
915
        _cleanup_free_ char *controller = NULL, *path = NULL;
23✔
916
        int r;
23✔
917

918
        assert(spec);
23✔
919

920
        if (*spec == '/') {
23✔
921
                if (!path_is_normalized(spec))
15✔
922
                        return -EINVAL;
923

924
                if (ret_path) {
15✔
925
                        r = path_simplify_alloc(spec, &path);
15✔
926
                        if (r < 0)
15✔
927
                                return r;
928
                }
929

930
        } else {
931
                const char *e;
8✔
932

933
                e = strchr(spec, ':');
8✔
934
                if (e) {
8✔
935
                        controller = strndup(spec, e-spec);
6✔
936
                        if (!controller)
6✔
937
                                return -ENOMEM;
938
                        if (!cg_controller_is_valid(controller))
6✔
939
                                return -EINVAL;
940

941
                        if (!isempty(e + 1)) {
3✔
942
                                path = strdup(e+1);
2✔
943
                                if (!path)
2✔
944
                                        return -ENOMEM;
945

946
                                if (!path_is_normalized(path) ||
2✔
947
                                    !path_is_absolute(path))
2✔
948
                                        return -EINVAL;
949

950
                                path_simplify(path);
1✔
951
                        }
952

953
                } else {
954
                        if (!cg_controller_is_valid(spec))
2✔
955
                                return -EINVAL;
956

957
                        if (ret_controller) {
1✔
958
                                controller = strdup(spec);
1✔
959
                                if (!controller)
1✔
960
                                        return -ENOMEM;
961
                        }
962
                }
963
        }
964

965
        if (ret_controller)
18✔
966
                *ret_controller = TAKE_PTR(controller);
18✔
967
        if (ret_path)
18✔
968
                *ret_path = TAKE_PTR(path);
18✔
969
        return 0;
970
}
971

972
int cg_mangle_path(const char *path, char **ret) {
465✔
973
        _cleanup_free_ char *c = NULL, *p = NULL;
465✔
974
        int r;
465✔
975

976
        assert(path);
465✔
977
        assert(ret);
465✔
978

979
        /* First, check if it already is a filesystem path */
980
        if (path_startswith(path, "/sys/fs/cgroup"))
465✔
981
                return path_simplify_alloc(path, ret);
461✔
982

983
        /* Otherwise, treat it as cg spec */
984
        r = cg_split_spec(path, &c, &p);
4✔
985
        if (r < 0)
4✔
986
                return r;
987

988
        return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, ret);
8✔
989
}
990

991
int cg_get_root_path(char **ret_path) {
13,461✔
992
        char *p, *e;
13,461✔
993
        int r;
13,461✔
994

995
        assert(ret_path);
13,461✔
996

997
        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
13,461✔
998
        if (r < 0)
13,461✔
999
                return r;
13,461✔
1000

1001
        e = endswith(p, "/" SPECIAL_INIT_SCOPE);
13,461✔
1002
        if (e)
13,461✔
1003
                *e = 0;
13,431✔
1004

1005
        *ret_path = p;
13,461✔
1006
        return 0;
13,461✔
1007
}
1008

1009
int cg_shift_path(const char *cgroup, const char *root, const char **ret_shifted) {
11,009✔
1010
        _cleanup_free_ char *rt = NULL;
11,009✔
1011
        char *p;
11,009✔
1012
        int r;
11,009✔
1013

1014
        assert(cgroup);
11,009✔
1015
        assert(ret_shifted);
11,009✔
1016

1017
        if (!root) {
11,009✔
1018
                /* If the root was specified let's use that, otherwise
1019
                 * let's determine it from PID 1 */
1020

1021
                r = cg_get_root_path(&rt);
1,990✔
1022
                if (r < 0)
1,990✔
1023
                        return r;
1024

1025
                root = rt;
1,990✔
1026
        }
1027

1028
        p = path_startswith(cgroup, root);
11,009✔
1029
        if (p && p > cgroup)
11,009✔
1030
                *ret_shifted = p - 1;
2✔
1031
        else
1032
                *ret_shifted = cgroup;
11,007✔
1033

1034
        return 0;
1035
}
1036

1037
int cg_pid_get_path_shifted(pid_t pid, const char *root, char **ret_cgroup) {
13,999✔
1038
        _cleanup_free_ char *raw = NULL;
13,999✔
1039
        const char *c;
13,999✔
1040
        int r;
13,999✔
1041

1042
        assert(pid >= 0);
13,999✔
1043
        assert(ret_cgroup);
13,999✔
1044

1045
        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
13,999✔
1046
        if (r < 0)
13,999✔
1047
                return r;
1048

1049
        r = cg_shift_path(raw, root, &c);
10,829✔
1050
        if (r < 0)
10,829✔
1051
                return r;
1052

1053
        if (c == raw) {
10,829✔
1054
                *ret_cgroup = TAKE_PTR(raw);
10,829✔
1055
                return 0;
10,829✔
1056
        }
1057

1058
        return strdup_to(ret_cgroup, c);
×
1059
}
1060

1061
int cg_path_decode_unit(const char *cgroup, char **ret_unit) {
32,660✔
1062
        assert(cgroup);
32,660✔
1063
        assert(ret_unit);
32,660✔
1064

1065
        size_t n = strcspn(cgroup, "/");
32,660✔
1066
        if (n < 3)
32,660✔
1067
                return -ENXIO;
1068

1069
        char *c = strndupa_safe(cgroup, n);
32,651✔
1070
        c = cg_unescape(c);
32,651✔
1071

1072
        if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
32,651✔
1073
                return -ENXIO;
1074

1075
        return strdup_to(ret_unit, c);
32,643✔
1076
}
1077

1078
static bool valid_slice_name(const char *p, size_t n) {
115,759✔
1079

1080
        if (!p)
115,759✔
1081
                return false;
1082

1083
        if (n < STRLEN("x.slice"))
115,744✔
1084
                return false;
1085

1086
        if (memcmp(p + n - 6, ".slice", 6) == 0) {
115,724✔
1087
                char buf[n+1], *c;
58,742✔
1088

1089
                memcpy(buf, p, n);
58,742✔
1090
                buf[n] = 0;
58,742✔
1091

1092
                c = cg_unescape(buf);
58,742✔
1093

1094
                return unit_name_is_valid(c, UNIT_NAME_PLAIN);
58,742✔
1095
        }
1096

1097
        return false;
1098
}
1099

1100
static const char *skip_slices(const char *p) {
41,081✔
1101
        assert(p);
41,081✔
1102

1103
        /* Skips over all slice assignments */
1104

1105
        for (;;) {
125,687✔
1106
                size_t n;
83,384✔
1107

1108
                p += strspn(p, "/");
83,384✔
1109

1110
                n = strcspn(p, "/");
83,384✔
1111
                if (!valid_slice_name(p, n))
83,384✔
1112
                        return p;
41,081✔
1113

1114
                p += n;
42,303✔
1115
        }
1116
}
1117

1118
int cg_path_get_unit(const char *path, char **ret) {
16,953✔
1119
        _cleanup_free_ char *unit = NULL;
16,953✔
1120
        const char *e;
16,953✔
1121
        int r;
16,953✔
1122

1123
        assert(path);
16,953✔
1124
        assert(ret);
16,953✔
1125

1126
        e = skip_slices(path);
16,953✔
1127

1128
        r = cg_path_decode_unit(e, &unit);
16,953✔
1129
        if (r < 0)
16,953✔
1130
                return r;
1131

1132
        /* We skipped over the slices, don't accept any now */
1133
        if (endswith(unit, ".slice"))
16,940✔
1134
                return -ENXIO;
1135

1136
        *ret = TAKE_PTR(unit);
16,940✔
1137
        return 0;
16,940✔
1138
}
1139

1140
int cg_path_get_unit_path(const char *path, char **ret) {
8,855✔
1141
        _cleanup_free_ char *path_copy = NULL;
8,855✔
1142
        char *unit_name;
8,855✔
1143

1144
        assert(path);
8,855✔
1145
        assert(ret);
8,855✔
1146

1147
        path_copy = strdup(path);
8,855✔
1148
        if (!path_copy)
8,855✔
1149
                return -ENOMEM;
1150

1151
        unit_name = (char *)skip_slices(path_copy);
8,855✔
1152
        unit_name[strcspn(unit_name, "/")] = 0;
8,855✔
1153

1154
        if (!unit_name_is_valid(cg_unescape(unit_name), UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
8,855✔
1155
                return -ENXIO;
1156

1157
        *ret = TAKE_PTR(path_copy);
8,852✔
1158

1159
        return 0;
8,852✔
1160
}
1161

1162
int cg_pid_get_unit(pid_t pid, char **ret_unit) {
612✔
1163
        _cleanup_free_ char *cgroup = NULL;
612✔
1164
        int r;
612✔
1165

1166
        assert(ret_unit);
612✔
1167

1168
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
612✔
1169
        if (r < 0)
612✔
1170
                return r;
1171

1172
        return cg_path_get_unit(cgroup, ret_unit);
608✔
1173
}
1174

1175
int cg_pidref_get_unit(const PidRef *pidref, char **ret) {
539✔
1176
        _cleanup_free_ char *unit = NULL;
539✔
1177
        int r;
539✔
1178

1179
        assert(ret);
539✔
1180

1181
        if (!pidref_is_set(pidref))
539✔
1182
                return -ESRCH;
1183
        if (pidref_is_remote(pidref))
1,078✔
1184
                return -EREMOTE;
1185

1186
        r = cg_pid_get_unit(pidref->pid, &unit);
539✔
1187
        if (r < 0)
539✔
1188
                return r;
1189

1190
        r = pidref_verify(pidref);
535✔
1191
        if (r < 0)
535✔
1192
                return r;
1193

1194
        *ret = TAKE_PTR(unit);
535✔
1195
        return 0;
535✔
1196
}
1197

1198
/**
1199
 * Skip session-*.scope, but require it to be there.
1200
 */
1201
static const char *skip_session(const char *p) {
14,860✔
1202
        size_t n;
14,860✔
1203

1204
        if (isempty(p))
14,860✔
1205
                return NULL;
1206

1207
        p += strspn(p, "/");
14,856✔
1208

1209
        n = strcspn(p, "/");
14,856✔
1210
        if (n < STRLEN("session-x.scope"))
14,856✔
1211
                return NULL;
1212

1213
        if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
14,688✔
1214
                char buf[n - 8 - 6 + 1];
23✔
1215

1216
                memcpy(buf, p + 8, n - 8 - 6);
23✔
1217
                buf[n - 8 - 6] = 0;
23✔
1218

1219
                /* Note that session scopes never need unescaping,
1220
                 * since they cannot conflict with the kernel's own
1221
                 * names, hence we don't need to call cg_unescape()
1222
                 * here. */
1223

1224
                if (!session_id_valid(buf))
23✔
1225
                        return NULL;
23✔
1226

1227
                p += n;
23✔
1228
                p += strspn(p, "/");
23✔
1229
                return p;
23✔
1230
        }
1231

1232
        return NULL;
1233
}
1234

1235
/**
1236
 * Skip user@*.service or capsule@*.service, but require either of them to be there.
1237
 */
1238
static const char *skip_user_manager(const char *p) {
15,273✔
1239
        size_t n;
15,273✔
1240

1241
        if (isempty(p))
15,273✔
1242
                return NULL;
15,273✔
1243

1244
        p += strspn(p, "/");
15,269✔
1245

1246
        n = strcspn(p, "/");
15,269✔
1247
        if (n < CONST_MIN(STRLEN("user@x.service"), STRLEN("capsule@x.service")))
15,269✔
1248
                return NULL;
1249

1250
        /* Any possible errors from functions called below are converted to NULL return, so our callers won't
1251
         * resolve user/capsule name. */
1252
        _cleanup_free_ char *unit_name = strndup(p, n);
15,101✔
1253
        if (!unit_name)
15,101✔
1254
                return NULL;
1255

1256
        _cleanup_free_ char *i = NULL;
15,101✔
1257
        UnitNameFlags type = unit_name_to_instance(unit_name, &i);
15,101✔
1258

1259
        if (type != UNIT_NAME_INSTANCE)
15,101✔
1260
                return NULL;
1261

1262
        /* Note that user manager services never need unescaping, since they cannot conflict with the
1263
         * kernel's own names, hence we don't need to call cg_unescape() here.  Prudently check validity of
1264
         * instance names, they should be always valid as we validate them upon unit start. */
1265
        if (startswith(unit_name, "user@")) {
499✔
1266
                if (parse_uid(i, NULL) < 0)
408✔
1267
                        return NULL;
1268

1269
                p += n;
408✔
1270
                p += strspn(p, "/");
408✔
1271
                return p;
408✔
1272
        } else if (startswith(unit_name, "capsule@")) {
91✔
1273
                if (capsule_name_is_valid(i) <= 0)
5✔
1274
                        return NULL;
1275

1276
                p += n;
5✔
1277
                p += strspn(p, "/");
5✔
1278
                return p;
5✔
1279
        }
1280

1281
        return NULL;
1282
}
1283

1284
static const char *skip_user_prefix(const char *path) {
15,273✔
1285
        const char *e, *t;
15,273✔
1286

1287
        assert(path);
15,273✔
1288

1289
        /* Skip slices, if there are any */
1290
        e = skip_slices(path);
15,273✔
1291

1292
        /* Skip the user manager, if it's in the path now... */
1293
        t = skip_user_manager(e);
15,273✔
1294
        if (t)
15,273✔
1295
                return t;
1296

1297
        /* Alternatively skip the user session if it is in the path... */
1298
        return skip_session(e);
14,860✔
1299
}
1300

1301
int cg_path_get_user_unit(const char *path, char **ret) {
7,666✔
1302
        const char *t;
7,666✔
1303

1304
        assert(path);
7,666✔
1305
        assert(ret);
7,666✔
1306

1307
        t = skip_user_prefix(path);
7,666✔
1308
        if (!t)
7,666✔
1309
                return -ENXIO;
1310

1311
        /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
1312
         * parser. */
1313
        return cg_path_get_unit(t, ret);
227✔
1314
}
1315

1316
int cg_pid_get_user_unit(pid_t pid, char **ret_unit) {
59✔
1317
        _cleanup_free_ char *cgroup = NULL;
59✔
1318
        int r;
59✔
1319

1320
        assert(ret_unit);
59✔
1321

1322
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
59✔
1323
        if (r < 0)
59✔
1324
                return r;
1325

1326
        return cg_path_get_user_unit(cgroup, ret_unit);
59✔
1327
}
1328

1329
int cg_path_get_machine_name(const char *path, char **ret_machine) {
44✔
1330
        _cleanup_free_ char *u = NULL;
44✔
1331
        const char *sl;
44✔
1332
        int r;
44✔
1333

1334
        r = cg_path_get_unit(path, &u);
44✔
1335
        if (r < 0)
44✔
1336
                return r;
1337

1338
        sl = strjoina("/run/systemd/machines/unit:", u);
220✔
1339
        return readlink_malloc(sl, ret_machine);
44✔
1340
}
1341

1342
int cg_pid_get_machine_name(pid_t pid, char **ret_machine) {
44✔
1343
        _cleanup_free_ char *cgroup = NULL;
44✔
1344
        int r;
44✔
1345

1346
        assert(ret_machine);
44✔
1347

1348
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
44✔
1349
        if (r < 0)
44✔
1350
                return r;
1351

1352
        return cg_path_get_machine_name(cgroup, ret_machine);
44✔
1353
}
1354

1355
int cg_path_get_session(const char *path, char **ret_session) {
8,413✔
1356
        _cleanup_free_ char *unit = NULL;
8,413✔
1357
        char *start, *end;
8,413✔
1358
        int r;
8,413✔
1359

1360
        assert(path);
8,413✔
1361

1362
        r = cg_path_get_unit(path, &unit);
8,413✔
1363
        if (r < 0)
8,413✔
1364
                return r;
1365

1366
        start = startswith(unit, "session-");
8,412✔
1367
        if (!start)
8,412✔
1368
                return -ENXIO;
1369
        end = endswith(start, ".scope");
312✔
1370
        if (!end)
312✔
1371
                return -ENXIO;
1372

1373
        *end = 0;
312✔
1374
        if (!session_id_valid(start))
312✔
1375
                return -ENXIO;
1376

1377
        if (!ret_session)
311✔
1378
                return 0;
1379

1380
        return strdup_to(ret_session, start);
311✔
1381
}
1382

1383
int cg_pid_get_session(pid_t pid, char **ret_session) {
744✔
1384
        _cleanup_free_ char *cgroup = NULL;
744✔
1385
        int r;
744✔
1386

1387
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
744✔
1388
        if (r < 0)
744✔
1389
                return r;
1390

1391
        return cg_path_get_session(cgroup, ret_session);
744✔
1392
}
1393

1394
int cg_pidref_get_session(const PidRef *pidref, char **ret) {
328✔
1395
        int r;
328✔
1396

1397
        if (!pidref_is_set(pidref))
328✔
1398
                return -ESRCH;
328✔
1399
        if (pidref_is_remote(pidref))
656✔
1400
                return -EREMOTE;
1401

1402
        _cleanup_free_ char *session = NULL;
328✔
1403
        r = cg_pid_get_session(pidref->pid, &session);
328✔
1404
        if (r < 0)
328✔
1405
                return r;
1406

1407
        r = pidref_verify(pidref);
271✔
1408
        if (r < 0)
271✔
1409
                return r;
1410

1411
        if (ret)
271✔
1412
                *ret = TAKE_PTR(session);
271✔
1413
        return 0;
1414
}
1415

1416
int cg_path_get_owner_uid(const char *path, uid_t *ret_uid) {
8,063✔
1417
        _cleanup_free_ char *slice = NULL;
8,063✔
1418
        char *start, *end;
8,063✔
1419
        int r;
8,063✔
1420

1421
        assert(path);
8,063✔
1422

1423
        r = cg_path_get_slice(path, &slice);
8,063✔
1424
        if (r < 0)
8,063✔
1425
                return r;
1426

1427
        start = startswith(slice, "user-");
8,063✔
1428
        if (!start)
8,063✔
1429
                return -ENXIO;
1430

1431
        end = endswith(start, ".slice");
470✔
1432
        if (!end)
470✔
1433
                return -ENXIO;
1434

1435
        *end = 0;
470✔
1436
        if (parse_uid(start, ret_uid) < 0)
470✔
1437
                return -ENXIO;
×
1438

1439
        return 0;
1440
}
1441

1442
int cg_pid_get_owner_uid(pid_t pid, uid_t *ret_uid) {
414✔
1443
        _cleanup_free_ char *cgroup = NULL;
414✔
1444
        int r;
414✔
1445

1446
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
414✔
1447
        if (r < 0)
414✔
1448
                return r;
1449

1450
        return cg_path_get_owner_uid(cgroup, ret_uid);
414✔
1451
}
1452

1453
int cg_pidref_get_owner_uid(const PidRef *pidref, uid_t *ret) {
57✔
1454
        int r;
57✔
1455

1456
        if (!pidref_is_set(pidref))
57✔
1457
                return -ESRCH;
57✔
1458
        if (pidref_is_remote(pidref))
57✔
1459
                return -EREMOTE;
1460

1461
        uid_t uid;
57✔
1462
        r = cg_pid_get_owner_uid(pidref->pid, &uid);
57✔
1463
        if (r < 0)
57✔
1464
                return r;
1465

1466
        r = pidref_verify(pidref);
14✔
1467
        if (r < 0)
14✔
1468
                return r;
1469

1470
        if (ret)
14✔
1471
                *ret = uid;
14✔
1472

1473
        return 0;
1474
}
1475

1476
int cg_path_get_slice(const char *p, char **ret_slice) {
15,936✔
1477
        const char *e = NULL;
15,936✔
1478

1479
        assert(p);
15,936✔
1480
        assert(ret_slice);
15,936✔
1481

1482
        /* Finds the right-most slice unit from the beginning, but stops before we come to
1483
         * the first non-slice unit. */
1484

1485
        for (;;) {
48,814✔
1486
                const char *s;
32,375✔
1487
                int n;
32,375✔
1488

1489
                n = path_find_first_component(&p, /* accept_dot_dot = */ false, &s);
32,375✔
1490
                if (n < 0)
32,375✔
1491
                        return n;
×
1492
                if (!valid_slice_name(s, n))
32,375✔
1493
                        break;
1494

1495
                e = s;
16,439✔
1496
        }
1497

1498
        if (e)
15,936✔
1499
                return cg_path_decode_unit(e, ret_slice);
15,698✔
1500

1501
        return strdup_to(ret_slice, SPECIAL_ROOT_SLICE);
238✔
1502
}
1503

1504
int cg_pid_get_slice(pid_t pid, char **ret_slice) {
63✔
1505
        _cleanup_free_ char *cgroup = NULL;
63✔
1506
        int r;
63✔
1507

1508
        assert(ret_slice);
63✔
1509

1510
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
63✔
1511
        if (r < 0)
63✔
1512
                return r;
1513

1514
        return cg_path_get_slice(cgroup, ret_slice);
63✔
1515
}
1516

1517
int cg_path_get_user_slice(const char *p, char **ret_slice) {
7,607✔
1518
        const char *t;
7,607✔
1519
        assert(p);
7,607✔
1520
        assert(ret_slice);
7,607✔
1521

1522
        t = skip_user_prefix(p);
7,607✔
1523
        if (!t)
7,607✔
1524
                return -ENXIO;
1525

1526
        /* And now it looks pretty much the same as for a system slice, so let's just use the same parser
1527
         * from here on. */
1528
        return cg_path_get_slice(t, ret_slice);
209✔
1529
}
1530

1531
int cg_pid_get_user_slice(pid_t pid, char **ret_slice) {
×
1532
        _cleanup_free_ char *cgroup = NULL;
×
1533
        int r;
×
1534

1535
        assert(ret_slice);
×
1536

1537
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
×
1538
        if (r < 0)
×
1539
                return r;
1540

1541
        return cg_path_get_user_slice(cgroup, ret_slice);
×
1542
}
1543

1544
bool cg_needs_escape(const char *p) {
12,487✔
1545

1546
        /* Checks if the specified path is a valid cgroup name by our rules, or if it must be escaped. Note
1547
         * that we consider escaped cgroup names invalid here, as they need to be escaped a second time if
1548
         * they shall be used. Also note that various names cannot be made valid by escaping even if we
1549
         * return true here (because too long, or contain the forbidden character "/"). */
1550

1551
        if (!filename_is_valid(p))
12,487✔
1552
                return true;
1553

1554
        if (IN_SET(p[0], '_', '.'))
12,483✔
1555
                return true;
1556

1557
        if (STR_IN_SET(p, "notify_on_release", "release_agent", "tasks"))
12,477✔
1558
                return true;
2✔
1559

1560
        if (startswith(p, "cgroup."))
12,475✔
1561
                return true;
1562

1563
        for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
174,622✔
1564
                const char *q;
162,149✔
1565

1566
                q = startswith(p, cgroup_controller_to_string(c));
162,149✔
1567
                if (!q)
162,149✔
1568
                        continue;
162,149✔
1569

1570
                if (q[0] == '.')
×
1571
                        return true;
1572
        }
1573

1574
        return false;
1575
}
1576

1577
int cg_escape(const char *p, char **ret) {
12,234✔
1578
        _cleanup_free_ char *n = NULL;
12,234✔
1579

1580
        /* This implements very minimal escaping for names to be used as file names in the cgroup tree: any
1581
         * name which might conflict with a kernel name or is prefixed with '_' is prefixed with a '_'. That
1582
         * way, when reading cgroup names it is sufficient to remove a single prefixing underscore if there
1583
         * is one. */
1584

1585
        /* The return value of this function (unlike cg_unescape()) needs free()! */
1586

1587
        if (cg_needs_escape(p)) {
12,234✔
1588
                n = strjoin("_", p);
7✔
1589
                if (!n)
7✔
1590
                        return -ENOMEM;
1591

1592
                if (!filename_is_valid(n)) /* became invalid due to the prefixing? Or contained things like a slash that cannot be fixed by prefixing? */
7✔
1593
                        return -EINVAL;
1594
        } else {
1595
                n = strdup(p);
12,227✔
1596
                if (!n)
12,227✔
1597
                        return -ENOMEM;
1598
        }
1599

1600
        *ret = TAKE_PTR(n);
12,234✔
1601
        return 0;
12,234✔
1602
}
1603

1604
char* cg_unescape(const char *p) {
100,471✔
1605
        assert(p);
100,471✔
1606

1607
        /* The return value of this function (unlike cg_escape())
1608
         * doesn't need free()! */
1609

1610
        if (p[0] == '_')
100,471✔
1611
                return (char*) p+1;
14✔
1612

1613
        return (char*) p;
1614
}
1615

1616
#define CONTROLLER_VALID                        \
1617
        DIGITS LETTERS                          \
1618
        "_"
1619

1620
bool cg_controller_is_valid(const char *p) {
300,294✔
1621
        const char *t, *s;
300,294✔
1622

1623
        if (!p)
300,294✔
1624
                return false;
1625

1626
        if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
300,294✔
1627
                return true;
1628

1629
        s = startswith(p, "name=");
96,043✔
1630
        if (s)
96,043✔
1631
                p = s;
2✔
1632

1633
        if (IN_SET(*p, 0, '_'))
96,043✔
1634
                return false;
1635

1636
        for (t = p; *t; t++)
610,016✔
1637
                if (!strchr(CONTROLLER_VALID, *t))
513,984✔
1638
                        return false;
1639

1640
        if (t - p > NAME_MAX)
96,032✔
1641
                return false;
×
1642

1643
        return true;
1644
}
1645

1646
int cg_slice_to_path(const char *unit, char **ret) {
4,946✔
1647
        _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
4,946✔
1648
        const char *dash;
4,946✔
1649
        int r;
4,946✔
1650

1651
        assert(unit);
4,946✔
1652
        assert(ret);
4,946✔
1653

1654
        if (streq(unit, SPECIAL_ROOT_SLICE))
4,946✔
1655
                return strdup_to(ret, "");
7✔
1656

1657
        if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
4,939✔
1658
                return -EINVAL;
1659

1660
        if (!endswith(unit, ".slice"))
4,928✔
1661
                return -EINVAL;
1662

1663
        r = unit_name_to_prefix(unit, &p);
4,927✔
1664
        if (r < 0)
4,927✔
1665
                return r;
1666

1667
        dash = strchr(p, '-');
4,927✔
1668

1669
        /* Don't allow initial dashes */
1670
        if (dash == p)
4,927✔
1671
                return -EINVAL;
1672

1673
        while (dash) {
5,087✔
1674
                _cleanup_free_ char *escaped = NULL;
165✔
1675
                char n[dash - p + sizeof(".slice")];
165✔
1676

1677
#if HAS_FEATURE_MEMORY_SANITIZER
1678
                /* msan doesn't instrument stpncpy, so it thinks
1679
                 * n is later used uninitialized:
1680
                 * https://github.com/google/sanitizers/issues/926
1681
                 */
1682
                zero(n);
1683
#endif
1684

1685
                /* Don't allow trailing or double dashes */
1686
                if (IN_SET(dash[1], 0, '-'))
165✔
1687
                        return -EINVAL;
1688

1689
                strcpy(stpncpy(n, p, dash - p), ".slice");
163✔
1690
                if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
163✔
1691
                        return -EINVAL;
1692

1693
                r = cg_escape(n, &escaped);
163✔
1694
                if (r < 0)
163✔
1695
                        return r;
1696

1697
                if (!strextend(&s, escaped, "/"))
163✔
1698
                        return -ENOMEM;
1699

1700
                dash = strchr(dash+1, '-');
163✔
1701
        }
1702

1703
        r = cg_escape(unit, &e);
4,922✔
1704
        if (r < 0)
4,922✔
1705
                return r;
1706

1707
        if (!strextend(&s, e))
4,922✔
1708
                return -ENOMEM;
1709

1710
        *ret = TAKE_PTR(s);
4,922✔
1711
        return 0;
4,922✔
1712
}
1713

1714
int cg_is_threaded(const char *path) {
×
1715
        _cleanup_free_ char *fs = NULL, *contents = NULL;
×
1716
        _cleanup_strv_free_ char **v = NULL;
×
1717
        int r;
×
1718

1719
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.type", &fs);
×
1720
        if (r < 0)
×
1721
                return r;
1722

1723
        r = read_full_virtual_file(fs, &contents, NULL);
×
1724
        if (r == -ENOENT)
×
1725
                return false; /* Assume no. */
1726
        if (r < 0)
×
1727
                return r;
1728

1729
        v = strv_split(contents, NULL);
×
1730
        if (!v)
×
1731
                return -ENOMEM;
1732

1733
        /* If the cgroup is in the threaded mode, it contains "threaded".
1734
         * If one of the parents or siblings is in the threaded mode, it may contain "invalid". */
1735
        return strv_contains(v, "threaded") || strv_contains(v, "invalid");
×
1736
}
1737

1738
int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
34,290✔
1739
        _cleanup_free_ char *p = NULL;
34,290✔
1740
        int r;
34,290✔
1741

1742
        r = cg_get_path(controller, path, attribute, &p);
34,290✔
1743
        if (r < 0)
34,290✔
1744
                return r;
1745

1746
        return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
34,290✔
1747
}
1748

1749
int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
27,785✔
1750
        _cleanup_free_ char *p = NULL;
27,785✔
1751
        int r;
27,785✔
1752

1753
        r = cg_get_path(controller, path, attribute, &p);
27,785✔
1754
        if (r < 0)
27,785✔
1755
                return r;
1756

1757
        return read_one_line_file(p, ret);
27,785✔
1758
}
1759

1760
int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret) {
23,398✔
1761
        _cleanup_free_ char *value = NULL;
23,398✔
1762
        uint64_t v;
23,398✔
1763
        int r;
23,398✔
1764

1765
        assert(ret);
23,398✔
1766

1767
        r = cg_get_attribute(controller, path, attribute, &value);
23,398✔
1768
        if (r == -ENOENT)
23,398✔
1769
                return -ENODATA;
1770
        if (r < 0)
21,787✔
1771
                return r;
1772

1773
        if (streq(value, "max")) {
21,787✔
1774
                *ret = CGROUP_LIMIT_MAX;
5,246✔
1775
                return 0;
5,246✔
1776
        }
1777

1778
        r = safe_atou64(value, &v);
16,541✔
1779
        if (r < 0)
16,541✔
1780
                return r;
1781

1782
        *ret = v;
16,541✔
1783
        return 0;
16,541✔
1784
}
1785

1786
int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret) {
61✔
1787
        _cleanup_free_ char *value = NULL;
61✔
1788
        int r;
61✔
1789

1790
        assert(ret);
61✔
1791

1792
        r = cg_get_attribute(controller, path, attribute, &value);
61✔
1793
        if (r == -ENOENT)
61✔
1794
                return -ENODATA;
1795
        if (r < 0)
61✔
1796
                return r;
1797

1798
        r = parse_boolean(value);
61✔
1799
        if (r < 0)
61✔
1800
                return r;
1801

1802
        *ret = r;
61✔
1803
        return 0;
61✔
1804
}
1805

1806
int cg_get_owner(const char *path, uid_t *ret_uid) {
35✔
1807
        _cleanup_free_ char *f = NULL;
35✔
1808
        struct stat stats;
35✔
1809
        int r;
35✔
1810

1811
        assert(ret_uid);
35✔
1812

1813
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &f);
35✔
1814
        if (r < 0)
35✔
1815
                return r;
1816

1817
        if (stat(f, &stats) < 0)
35✔
1818
                return -errno;
16✔
1819

1820
        r = stat_verify_directory(&stats);
19✔
1821
        if (r < 0)
19✔
1822
                return r;
1823

1824
        *ret_uid = stats.st_uid;
19✔
1825
        return 0;
19✔
1826
}
1827

1828
int cg_get_keyed_attribute_full(
26,386✔
1829
                const char *controller,
1830
                const char *path,
1831
                const char *attribute,
1832
                char **keys,
1833
                char **ret_values,
1834
                CGroupKeyMode mode) {
1835

1836
        _cleanup_free_ char *filename = NULL, *contents = NULL;
26,386✔
1837
        const char *p;
26,386✔
1838
        size_t n, i, n_done = 0;
26,386✔
1839
        char **v;
26,386✔
1840
        int r;
26,386✔
1841

1842
        /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
1843
         * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
1844
         * entries as 'keys'. On success each entry will be set to the value of the matching key.
1845
         *
1846
         * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. If mode
1847
         * is set to GG_KEY_MODE_GRACEFUL we ignore missing keys and return those that were parsed successfully. */
1848

1849
        r = cg_get_path(controller, path, attribute, &filename);
26,386✔
1850
        if (r < 0)
26,386✔
1851
                return r;
1852

1853
        r = read_full_file(filename, &contents, NULL);
26,386✔
1854
        if (r < 0)
26,386✔
1855
                return r;
1856

1857
        n = strv_length(keys);
20,132✔
1858
        if (n == 0) /* No keys to retrieve? That's easy, we are done then */
20,132✔
1859
                return 0;
1860

1861
        /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
1862
        v = newa0(char*, n);
20,132✔
1863

1864
        for (p = contents; *p;) {
68,597✔
1865
                const char *w = NULL;
1866

1867
                for (i = 0; i < n; i++)
117,062✔
1868
                        if (!v[i]) {
75,542✔
1869
                                w = first_word(p, keys[i]);
68,597✔
1870
                                if (w)
68,597✔
1871
                                        break;
1872
                        }
1873

1874
                if (w) {
68,597✔
1875
                        size_t l;
27,077✔
1876

1877
                        l = strcspn(w, NEWLINE);
27,077✔
1878
                        v[i] = strndup(w, l);
27,077✔
1879
                        if (!v[i]) {
27,077✔
1880
                                r = -ENOMEM;
×
1881
                                goto fail;
×
1882
                        }
1883

1884
                        n_done++;
27,077✔
1885
                        if (n_done >= n)
27,077✔
1886
                                goto done;
20,132✔
1887

1888
                        p = w + l;
6,945✔
1889
                } else
1890
                        p += strcspn(p, NEWLINE);
41,520✔
1891

1892
                p += strspn(p, NEWLINE);
48,465✔
1893
        }
1894

1895
        if (mode & CG_KEY_MODE_GRACEFUL)
×
1896
                goto done;
×
1897

1898
        r = -ENXIO;
1899

1900
fail:
×
1901
        free_many_charp(v, n);
×
1902
        return r;
×
1903

1904
done:
20,132✔
1905
        memcpy(ret_values, v, sizeof(char*) * n);
20,132✔
1906
        if (mode & CG_KEY_MODE_GRACEFUL)
20,132✔
1907
                return n_done;
6,945✔
1908

1909
        return 0;
1910
}
1911

1912
int cg_mask_to_string(CGroupMask mask, char **ret) {
11,833✔
1913
        _cleanup_free_ char *s = NULL;
11,833✔
1914
        bool space = false;
11,833✔
1915
        CGroupController c;
11,833✔
1916
        size_t n = 0;
11,833✔
1917

1918
        assert(ret);
11,833✔
1919

1920
        if (mask == 0) {
11,833✔
1921
                *ret = NULL;
4,884✔
1922
                return 0;
4,884✔
1923
        }
1924

1925
        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
97,286✔
1926
                const char *k;
90,337✔
1927
                size_t l;
90,337✔
1928

1929
                if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
90,337✔
1930
                        continue;
32,880✔
1931

1932
                k = cgroup_controller_to_string(c);
57,457✔
1933
                l = strlen(k);
57,457✔
1934

1935
                if (!GREEDY_REALLOC(s, n + space + l + 1))
57,457✔
1936
                        return -ENOMEM;
1937

1938
                if (space)
57,457✔
1939
                        s[n] = ' ';
50,508✔
1940
                memcpy(s + n + space, k, l);
57,457✔
1941
                n += space + l;
57,457✔
1942

1943
                space = true;
57,457✔
1944
        }
1945

1946
        assert(s);
6,949✔
1947

1948
        s[n] = 0;
6,949✔
1949
        *ret = TAKE_PTR(s);
6,949✔
1950

1951
        return 0;
6,949✔
1952
}
1953

1954
int cg_mask_from_string(const char *value, CGroupMask *ret) {
6,484✔
1955
        CGroupMask m = 0;
6,484✔
1956

1957
        assert(ret);
6,484✔
1958
        assert(value);
6,484✔
1959

1960
        for (;;) {
56,990✔
1961
                _cleanup_free_ char *n = NULL;
50,506✔
1962
                CGroupController v;
56,990✔
1963
                int r;
56,990✔
1964

1965
                r = extract_first_word(&value, &n, NULL, 0);
56,990✔
1966
                if (r < 0)
56,990✔
1967
                        return r;
×
1968
                if (r == 0)
56,990✔
1969
                        break;
1970

1971
                v = cgroup_controller_from_string(n);
50,506✔
1972
                if (v < 0)
50,506✔
1973
                        continue;
638✔
1974

1975
                m |= CGROUP_CONTROLLER_TO_MASK(v);
49,868✔
1976
        }
1977

1978
        *ret = m;
6,484✔
1979
        return 0;
6,484✔
1980
}
1981

1982
int cg_mask_supported_subtree(const char *root, CGroupMask *ret) {
463✔
1983
        CGroupMask mask;
463✔
1984
        int r;
463✔
1985

1986
        /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
1987
         * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
1988
         * pseudo-controllers. */
1989

1990
        r = cg_all_unified();
463✔
1991
        if (r < 0)
463✔
1992
                return r;
463✔
1993
        if (r > 0) {
463✔
1994
                _cleanup_free_ char *controllers = NULL, *path = NULL;
463✔
1995

1996
                /* In the unified hierarchy we can read the supported and accessible controllers from
1997
                 * the top-level cgroup attribute */
1998

1999
                r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
463✔
2000
                if (r < 0)
463✔
2001
                        return r;
2002

2003
                r = read_one_line_file(path, &controllers);
463✔
2004
                if (r < 0)
463✔
2005
                        return r;
2006

2007
                r = cg_mask_from_string(controllers, &mask);
463✔
2008
                if (r < 0)
463✔
2009
                        return r;
2010

2011
                /* Mask controllers that are not supported in unified hierarchy. */
2012
                mask &= CGROUP_MASK_V2;
463✔
2013

2014
        } else {
2015
                CGroupController c;
×
2016

2017
                /* In the legacy hierarchy, we check which hierarchies are accessible. */
2018

2019
                mask = 0;
×
2020
                for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
×
2021
                        CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
×
2022
                        const char *n;
×
2023

2024
                        if (!FLAGS_SET(CGROUP_MASK_V1, bit))
×
2025
                                continue;
×
2026

2027
                        n = cgroup_controller_to_string(c);
×
2028
                        if (controller_is_v1_accessible(root, n) >= 0)
×
2029
                                mask |= bit;
×
2030
                }
2031
        }
2032

2033
        *ret = mask;
463✔
2034
        return 0;
463✔
2035
}
2036

2037
int cg_mask_supported(CGroupMask *ret) {
217✔
2038
        _cleanup_free_ char *root = NULL;
217✔
2039
        int r;
217✔
2040

2041
        r = cg_get_root_path(&root);
217✔
2042
        if (r < 0)
217✔
2043
                return r;
2044

2045
        return cg_mask_supported_subtree(root, ret);
217✔
2046
}
2047

2048
/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on
2049
 * /sys/fs/cgroup/systemd. This unfortunately broke other tools (such as docker) which expected the v1
2050
 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mounts v2 on
2051
 * /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility
2052
 * with other tools.
2053
 *
2054
 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep
2055
 * cgroup v2 process management but disable the compat dual layout, we return true on
2056
 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and false on cg_hybrid_unified().
2057
 */
2058
static thread_local bool unified_systemd_v232;
2059

2060
int cg_unified_cached(bool flush) {
321,629✔
2061
        static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
321,629✔
2062

2063
        struct statfs fs;
321,629✔
2064

2065
        /* Checks if we support the unified hierarchy. Returns an
2066
         * error when the cgroup hierarchies aren't mounted yet or we
2067
         * have any other trouble determining if the unified hierarchy
2068
         * is supported. */
2069

2070
        if (flush)
321,629✔
2071
                unified_cache = CGROUP_UNIFIED_UNKNOWN;
4✔
2072
        else if (unified_cache >= CGROUP_UNIFIED_NONE)
321,625✔
2073
                return unified_cache;
321,629✔
2074

2075
        if (statfs("/sys/fs/cgroup/", &fs) < 0)
13,299✔
2076
                return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
×
2077

2078
        if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
13,299✔
2079
                log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
13,299✔
2080
                unified_cache = CGROUP_UNIFIED_ALL;
13,299✔
2081
        } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
×
2082
                if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
×
2083
                    F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
×
2084
                        log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
×
2085
                        unified_cache = CGROUP_UNIFIED_SYSTEMD;
×
2086
                        unified_systemd_v232 = false;
×
2087
                } else {
2088
                        if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) {
×
2089
                                if (errno == ENOENT) {
×
2090
                                        /* Some other software may have set up /sys/fs/cgroup in a configuration we do not recognize. */
2091
                                        log_debug_errno(errno, "Unsupported cgroupsv1 setup detected: name=systemd hierarchy not found.");
×
2092
                                        return -ENOMEDIUM;
×
2093
                                }
2094
                                return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
×
2095
                        }
2096

2097
                        if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
×
2098
                                log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
×
2099
                                unified_cache = CGROUP_UNIFIED_SYSTEMD;
×
2100
                                unified_systemd_v232 = true;
×
2101
                        } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
×
2102
                                log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
×
2103
                                unified_cache = CGROUP_UNIFIED_NONE;
×
2104
                        } else {
2105
                                log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
×
2106
                                          (unsigned long long) fs.f_type);
2107
                                unified_cache = CGROUP_UNIFIED_NONE;
×
2108
                        }
2109
                }
2110
        } else if (F_TYPE_EQUAL(fs.f_type, SYSFS_MAGIC)) {
×
2111
                return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
×
2112
                                       "No filesystem is currently mounted on /sys/fs/cgroup.");
2113
        } else
2114
                return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
×
2115
                                       "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2116
                                       (unsigned long long)fs.f_type);
2117

2118
        return unified_cache;
13,299✔
2119
}
2120

2121
int cg_unified_controller(const char *controller) {
50,707✔
2122
        int r;
50,707✔
2123

2124
        r = cg_unified_cached(false);
50,707✔
2125
        if (r < 0)
50,707✔
2126
                return r;
2127

2128
        if (r == CGROUP_UNIFIED_NONE)
50,707✔
2129
                return false;
2130

2131
        if (r >= CGROUP_UNIFIED_ALL)
50,707✔
2132
                return true;
2133

2134
        return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
×
2135
}
2136

2137
int cg_all_unified(void) {
270,916✔
2138
        int r;
270,916✔
2139

2140
        r = cg_unified_cached(false);
270,916✔
2141
        if (r < 0)
270,916✔
2142
                return r;
2143

2144
        return r >= CGROUP_UNIFIED_ALL;
270,916✔
2145
}
2146

2147
int cg_hybrid_unified(void) {
1✔
2148
        int r;
1✔
2149

2150
        r = cg_unified_cached(false);
1✔
2151
        if (r < 0)
1✔
2152
                return r;
2153

2154
        return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
1✔
2155
}
2156

2157
int cg_is_delegated(const char *path) {
19✔
2158
        int r;
19✔
2159

2160
        assert(path);
19✔
2161

2162
        r = cg_get_xattr_bool(path, "trusted.delegate");
19✔
2163
        if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
19✔
2164
                return r;
2165

2166
        /* If the trusted xattr isn't set (preferred), then check the untrusted one. Under the assumption
2167
         * that whoever is trusted enough to own the cgroup, is also trusted enough to decide if it is
2168
         * delegated or not this should be safe. */
2169
        r = cg_get_xattr_bool(path, "user.delegate");
6✔
2170
        return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
6✔
2171
}
2172

2173
int cg_is_delegated_fd(int fd) {
214✔
2174
        int r;
214✔
2175

2176
        assert(fd >= 0);
214✔
2177

2178
        r = getxattr_at_bool(fd, /* path= */ NULL, "trusted.delegate", /* flags= */ 0);
214✔
2179
        if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
214✔
2180
                return r;
2181

2182
        r = getxattr_at_bool(fd, /* path= */ NULL, "user.delegate", /* flags= */ 0);
200✔
2183
        return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
200✔
2184
}
2185

2186
int cg_has_coredump_receive(const char *path) {
2✔
2187
        int r;
2✔
2188

2189
        assert(path);
2✔
2190

2191
        r = cg_get_xattr_bool(path, "user.coredump_receive");
2✔
2192
        if (ERRNO_IS_NEG_XATTR_ABSENT(r))
2✔
2193
                return false;
×
2194

2195
        return r;
2196
}
2197

2198
const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2199
        [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
2200
        [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
2201
        [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
2202
        [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
2203
};
2204

2205
static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2206
        [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
2207
        [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
2208
        [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
2209
        [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
2210
};
2211

2212
DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
4,982✔
2213

2214
static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2215
        [CGROUP_CONTROLLER_CPU] = "cpu",
2216
        [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2217
        [CGROUP_CONTROLLER_CPUSET] = "cpuset",
2218
        [CGROUP_CONTROLLER_IO] = "io",
2219
        [CGROUP_CONTROLLER_BLKIO] = "blkio",
2220
        [CGROUP_CONTROLLER_MEMORY] = "memory",
2221
        [CGROUP_CONTROLLER_DEVICES] = "devices",
2222
        [CGROUP_CONTROLLER_PIDS] = "pids",
2223
        [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
2224
        [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
2225
        [CGROUP_CONTROLLER_BPF_FOREIGN] = "bpf-foreign",
2226
        [CGROUP_CONTROLLER_BPF_SOCKET_BIND] = "bpf-socket-bind",
2227
        [CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES] = "bpf-restrict-network-interfaces",
2228
};
2229

2230
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
291,452✔
2231

2232
CGroupMask get_cpu_accounting_mask(void) {
1,350,698✔
2233
        static CGroupMask needed_mask = (CGroupMask) -1;
1,350,698✔
2234

2235
        /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
2236
         * provided externally from the CPU controller, which means we don't
2237
         * need to enable the CPU controller just to get metrics. This is good,
2238
         * because enabling the CPU controller comes at a minor performance
2239
         * hit, especially when it's propagated deep into large hierarchies.
2240
         * There's also no separate CPU accounting controller available within
2241
         * a unified hierarchy.
2242
         *
2243
         * This combination of factors results in the desired cgroup mask to
2244
         * enable for CPU accounting varying as follows:
2245
         *
2246
         *                   ╔═════════════════════╤═════════════════════╗
2247
         *                   ║     Linux ≥4.15     │     Linux <4.15     ║
2248
         *   ╔═══════════════╬═════════════════════╪═════════════════════╣
2249
         *   ║ Unified       ║ nothing             │ CGROUP_MASK_CPU     ║
2250
         *   ╟───────────────╫─────────────────────┼─────────────────────╢
2251
         *   ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
2252
         *   ╚═══════════════╩═════════════════════╧═════════════════════╝
2253
         *
2254
         * We check kernel version here instead of manually checking whether
2255
         * cpu.stat is present for every cgroup, as that check in itself would
2256
         * already be fairly expensive.
2257
         *
2258
         * Kernels where this patch has been backported will therefore have the
2259
         * CPU controller enabled unnecessarily. This is more expensive than
2260
         * necessary, but harmless. ☺️
2261
         */
2262

2263
        if (needed_mask == (CGroupMask) -1) {
1,350,698✔
2264
                if (cg_all_unified()) {
707✔
2265
                        struct utsname u;
707✔
2266
                        assert_se(uname(&u) >= 0);
707✔
2267

2268
                        if (strverscmp_improved(u.release, "4.15") < 0)
707✔
2269
                                needed_mask = CGROUP_MASK_CPU;
×
2270
                        else
2271
                                needed_mask = 0;
707✔
2272
                } else
2273
                        needed_mask = CGROUP_MASK_CPUACCT;
×
2274
        }
2275

2276
        return needed_mask;
1,350,698✔
2277
}
2278

2279
bool cpu_accounting_is_cheap(void) {
2,334✔
2280
        return get_cpu_accounting_mask() == 0;
2,334✔
2281
}
2282

2283
static const char* const managed_oom_mode_table[_MANAGED_OOM_MODE_MAX] = {
2284
        [MANAGED_OOM_AUTO] = "auto",
2285
        [MANAGED_OOM_KILL] = "kill",
2286
};
2287

2288
DEFINE_STRING_TABLE_LOOKUP(managed_oom_mode, ManagedOOMMode);
31,135✔
2289

2290
static const char* const managed_oom_preference_table[_MANAGED_OOM_PREFERENCE_MAX] = {
2291
        [MANAGED_OOM_PREFERENCE_NONE] = "none",
2292
        [MANAGED_OOM_PREFERENCE_AVOID] = "avoid",
2293
        [MANAGED_OOM_PREFERENCE_OMIT] = "omit",
2294
};
2295

2296
DEFINE_STRING_TABLE_LOOKUP(managed_oom_preference, ManagedOOMPreference);
15,299✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc