• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 19381060735

14 Nov 2025 10:54PM UTC coverage: 72.37% (-0.02%) from 72.393%
19381060735

push

github

web-flow
5 TPM tweaks (#39712)

Fixes: #38939
Fixes: #39150

60 of 78 new or added lines in 3 files covered. (76.92%)

2631 existing lines in 50 files now uncovered.

307287 of 424606 relevant lines covered (72.37%)

1234902.43 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.28
/src/basic/cgroup-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <linux/fs.h>
4
#include <linux/magic.h>
5
#include <signal.h>
6
#include <stdlib.h>
7
#include <sys/xattr.h>
8
#include <threads.h>
9
#include <unistd.h>
10

11
#include "alloc-util.h"
12
#include "capsule-util.h"
13
#include "cgroup-util.h"
14
#include "dirent-util.h"
15
#include "errno-util.h"
16
#include "extract-word.h"
17
#include "fd-util.h"
18
#include "fileio.h"
19
#include "format-util.h"
20
#include "fs-util.h"
21
#include "log.h"
22
#include "login-util.h"
23
#include "parse-util.h"
24
#include "path-util.h"
25
#include "pidref.h"
26
#include "process-util.h"
27
#include "set.h"
28
#include "special.h"
29
#include "stat-util.h"
30
#include "string-table.h"
31
#include "string-util.h"
32
#include "strv.h"
33
#include "unaligned.h"
34
#include "unit-name.h"
35
#include "user-util.h"
36
#include "xattr-util.h"
37

38
/* The structure to pass to name_to_handle_at() on cgroupfs2 */
39
typedef union {
40
        struct file_handle file_handle;
41
        uint8_t space[offsetof(struct file_handle, f_handle) + sizeof(uint64_t)];
42
} cg_file_handle;
43

44
#define CG_FILE_HANDLE_INIT                                     \
45
        (cg_file_handle) {                                      \
46
                .file_handle.handle_bytes = sizeof(uint64_t),   \
47
                .file_handle.handle_type = FILEID_KERNFS,       \
48
        }
49

50
/* The .f_handle field is not aligned to 64bit on some archs, hence read it via an unaligned accessor */
51
#define CG_FILE_HANDLE_CGROUPID(fh) unaligned_read_ne64(fh.file_handle.f_handle)
52

53
int cg_path_open(const char *controller, const char *path) {
833✔
54
        _cleanup_free_ char *fs = NULL;
833✔
55
        int r;
833✔
56

57
        r = cg_get_path(controller, path, /* suffix=*/ NULL, &fs);
833✔
58
        if (r < 0)
833✔
59
                return r;
60

61
        return RET_NERRNO(open(fs, O_DIRECTORY|O_CLOEXEC));
833✔
62
}
63

64
int cg_cgroupid_open(int cgroupfs_fd, uint64_t id) {
12✔
65
        _cleanup_close_ int fsfd = -EBADF;
12✔
66

67
        if (cgroupfs_fd < 0) {
12✔
68
                fsfd = open("/sys/fs/cgroup", O_CLOEXEC|O_DIRECTORY);
11✔
69
                if (fsfd < 0)
11✔
70
                        return -errno;
×
71

72
                cgroupfs_fd = fsfd;
73
        }
74

75
        cg_file_handle fh = CG_FILE_HANDLE_INIT;
12✔
76
        unaligned_write_ne64(fh.file_handle.f_handle, id);
12✔
77

78
        return RET_NERRNO(open_by_handle_at(cgroupfs_fd, &fh.file_handle, O_DIRECTORY|O_CLOEXEC));
19✔
79
}
80

81
int cg_path_from_cgroupid(int cgroupfs_fd, uint64_t id, char **ret) {
×
82
        _cleanup_close_ int cgfd = -EBADF;
×
83
        int r;
×
84

85
        cgfd = cg_cgroupid_open(cgroupfs_fd, id);
×
86
        if (cgfd < 0)
×
87
                return cgfd;
88

89
        _cleanup_free_ char *path = NULL;
×
90
        r = fd_get_path(cgfd, &path);
×
91
        if (r < 0)
×
92
                return r;
93

94
        if (!path_startswith(path, "/sys/fs/cgroup/"))
×
95
                return -EXDEV; /* recognizable error */
96

97
        if (ret)
×
98
                *ret = TAKE_PTR(path);
×
99
        return 0;
100
}
101

102
int cg_get_cgroupid_at(int dfd, const char *path, uint64_t *ret) {
5,477✔
103
        cg_file_handle fh = CG_FILE_HANDLE_INIT;
5,477✔
104
        int mnt_id;
5,477✔
105

106
        assert(dfd >= 0 || (dfd == AT_FDCWD && path_is_absolute(path)));
10,908✔
107
        assert(ret);
5,477✔
108

109
        /* This is cgroupfs so we know the size of the handle, thus no need to loop around like
110
         * name_to_handle_at_loop() does in mountpoint-util.c */
111
        if (name_to_handle_at(dfd, strempty(path), &fh.file_handle, &mnt_id, isempty(path) ? AT_EMPTY_PATH : 0) < 0) {
10,954✔
112
                assert(errno != EOVERFLOW);
×
113
                return -errno;
×
114
        }
115

116
        *ret = CG_FILE_HANDLE_CGROUPID(fh);
5,477✔
117
        return 0;
5,477✔
118
}
119

120
int cg_enumerate_processes(const char *controller, const char *path, FILE **ret) {
16,626✔
121
        _cleanup_free_ char *fs = NULL;
16,626✔
122
        FILE *f;
16,626✔
123
        int r;
16,626✔
124

125
        assert(ret);
16,626✔
126

127
        r = cg_get_path(controller, path, "cgroup.procs", &fs);
16,626✔
128
        if (r < 0)
16,626✔
129
                return r;
130

131
        f = fopen(fs, "re");
16,626✔
132
        if (!f)
16,626✔
133
                return -errno;
10,318✔
134

135
        *ret = f;
6,308✔
136
        return 0;
6,308✔
137
}
138

139
int cg_read_pid(FILE *f, pid_t *ret, CGroupFlags flags) {
10,339✔
140
        unsigned long ul;
10,339✔
141

142
        /* Note that the cgroup.procs might contain duplicates! See cgroups.txt for details. */
143

144
        assert(f);
10,339✔
145
        assert(ret);
10,339✔
146

147
        /* NB: The kernel returns ENODEV if we tried to read from cgroup.procs of a cgroup that has been
148
         * removed already. Callers should handle that! */
149

150
        for (;;) {
10,339✔
151
                errno = 0;
10,339✔
152
                if (fscanf(f, "%lu", &ul) != 1) {
10,339✔
153

154
                        if (feof(f)) {
6,523✔
155
                                *ret = 0;
6,523✔
156
                                return 0;
6,523✔
157
                        }
158

UNCOV
159
                        return errno_or_else(EIO);
×
160
                }
161

162
                if (ul > PID_T_MAX)
3,816✔
163
                        return -EIO;
164

165
                /* In some circumstances (e.g. WSL), cgroups might contain unmappable PIDs from other
166
                 * contexts. These show up as zeros, and depending on the caller, can either be plain
167
                 * skipped over, or returned as-is. */
168
                if (ul == 0 && !FLAGS_SET(flags, CGROUP_DONT_SKIP_UNMAPPED))
3,816✔
169
                        continue;
×
170

171
                *ret = (pid_t) ul;
3,816✔
172
                return 1;
3,816✔
173
        }
174
}
175

176
int cg_read_pidref(FILE *f, PidRef *ret, CGroupFlags flags) {
7,409✔
177
        int r;
7,409✔
178

179
        assert(f);
7,409✔
180
        assert(ret);
7,409✔
181

182
        for (;;) {
×
183
                pid_t pid;
7,409✔
184

185
                r = cg_read_pid(f, &pid, flags);
7,409✔
186
                if (r < 0)
7,409✔
UNCOV
187
                        return log_debug_errno(r, "Failed to read pid from cgroup item: %m");
×
188
                if (r == 0) {
7,409✔
189
                        *ret = PIDREF_NULL;
5,932✔
190
                        return 0;
5,932✔
191
                }
192

193
                if (pid == 0)
1,477✔
194
                        return -EREMOTE;
195

196
                r = pidref_set_pid(ret, pid);
1,477✔
197
                if (r >= 0)
1,477✔
198
                        return 1;
199
                if (r != -ESRCH)
×
200
                        return r;
201

202
                /* ESRCH → gone by now? just skip over it, read the next */
203
        }
204
}
205

206
bool cg_kill_supported(void) {
×
207
        static thread_local int supported = -1;
×
208

209
        if (supported >= 0)
×
210
                return supported;
×
211

212
        if (cg_all_unified() <= 0)
×
213
                return (supported = false);
×
214

215
        if (access("/sys/fs/cgroup/init.scope/cgroup.kill", F_OK) >= 0)
×
216
                return (supported = true);
×
217
        if (errno != ENOENT)
×
218
                log_debug_errno(errno, "Failed to check whether cgroup.kill is available, assuming not: %m");
×
219
        return (supported = false);
×
220
}
221

222
int cg_enumerate_subgroups(const char *controller, const char *path, DIR **ret) {
16,192✔
223
        _cleanup_free_ char *fs = NULL;
16,192✔
224
        DIR *d;
16,192✔
225
        int r;
16,192✔
226

227
        assert(ret);
16,192✔
228

229
        /* This is not recursive! */
230

231
        r = cg_get_path(controller, path, NULL, &fs);
16,192✔
232
        if (r < 0)
16,192✔
233
                return r;
234

235
        d = opendir(fs);
16,192✔
236
        if (!d)
16,192✔
237
                return -errno;
10,318✔
238

239
        *ret = d;
5,874✔
240
        return 0;
5,874✔
241
}
242

243
int cg_read_subgroup(DIR *d, char **ret) {
7,255✔
244
        assert(d);
7,255✔
245
        assert(ret);
7,255✔
246

247
        FOREACH_DIRENT_ALL(de, d, return -errno) {
282,368✔
248
                if (de->d_type != DT_DIR)
276,279✔
249
                        continue;
262,935✔
250

251
                if (dot_or_dot_dot(de->d_name))
13,344✔
252
                        continue;
12,178✔
253

254
                return strdup_to_full(ret, de->d_name);
1,166✔
255
        }
256

257
        *ret = NULL;
6,089✔
258
        return 0;
6,089✔
259
}
260

261
int cg_kill(
16,126✔
262
                const char *path,
263
                int sig,
264
                CGroupFlags flags,
265
                Set *killed_pids,
266
                cg_kill_log_func_t log_kill,
267
                void *userdata) {
268

269
        _cleanup_set_free_ Set *allocated_set = NULL;
16,126✔
270
        int r, ret = 0;
16,126✔
271

272
        assert(path);
16,126✔
273
        assert(sig >= 0);
16,126✔
274

275
         /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence
276
          * don't send SIGCONT on SIGKILL. */
277
        if (IN_SET(sig, SIGCONT, SIGKILL))
16,126✔
278
                flags &= ~CGROUP_SIGCONT;
2,451✔
279

280
        /* This goes through the tasks list and kills them all. This is repeated until no further processes
281
         * are added to the tasks list, to properly handle forking processes.
282
         *
283
         * When sending SIGKILL, prefer cg_kill_kernel_sigkill(), which is fully atomic. */
284

285
        if (!killed_pids) {
16,126✔
286
                killed_pids = allocated_set = set_new(NULL);
672✔
287
                if (!killed_pids)
672✔
288
                        return -ENOMEM;
289
        }
290

291
        bool done;
16,207✔
292
        do {
16,207✔
293
                _cleanup_fclose_ FILE *f = NULL;
10,318✔
294
                int ret_log_kill;
16,207✔
295

296
                done = true;
16,207✔
297

298
                r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
16,207✔
299
                if (r == -ENOENT)
16,207✔
300
                        break;
301
                if (r < 0)
5,889✔
302
                        return RET_GATHER(ret, log_debug_errno(r, "Failed to enumerate cgroup items: %m"));
×
303

304
                for (;;) {
7,335✔
305
                        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
7,335✔
306

307
                        r = cg_read_pidref(f, &pidref, flags);
7,335✔
308
                        if (r == -ENODEV) {
7,335✔
309
                                /* reading from cgroup.pids will result in ENODEV if the cgroup is
310
                                 * concurrently removed. Just leave in that case, because a removed cgroup
311
                                 * contains no processes anymore. */
312
                                done = true;
313
                                break;
314
                        }
315
                        if (r < 0)
7,335✔
316
                                return RET_GATHER(ret, log_debug_errno(r, "Failed to read pidref from cgroup '%s': %m", path));
×
317
                        if (r == 0)
7,335✔
318
                                break;
319

320
                        if ((flags & CGROUP_IGNORE_SELF) && pidref_is_self(&pidref))
1,446✔
321
                                continue;
672✔
322

323
                        if (set_contains(killed_pids, PID_TO_PTR(pidref.pid)))
774✔
324
                                continue;
578✔
325

326
                        /* Ignore kernel threads to mimic the behavior of cgroup.kill. */
327
                        if (pidref_is_kernel_thread(&pidref) > 0) {
196✔
328
                                log_debug("Ignoring kernel thread with pid " PID_FMT " in cgroup '%s'", pidref.pid, path);
×
329
                                continue;
×
330
                        }
331

332
                        if (log_kill)
196✔
333
                                ret_log_kill = log_kill(&pidref, sig, userdata);
65✔
334

335
                        /* If we haven't killed this process yet, kill it */
336
                        r = pidref_kill(&pidref, sig);
196✔
337
                        if (r < 0 && r != -ESRCH)
196✔
338
                                RET_GATHER(ret, log_debug_errno(r, "Failed to kill process with pid " PID_FMT " from cgroup '%s': %m", pidref.pid, path));
×
339
                        if (r >= 0) {
196✔
340
                                if (flags & CGROUP_SIGCONT)
196✔
341
                                        (void) pidref_kill(&pidref, SIGCONT);
130✔
342

343
                                if (ret == 0) {
196✔
344
                                        if (log_kill)
123✔
345
                                                ret = ret_log_kill;
346
                                        else
347
                                                ret = 1;
58✔
348
                                }
349
                        }
350

351
                        done = false;
196✔
352

353
                        r = set_put(killed_pids, PID_TO_PTR(pidref.pid));
196✔
354
                        if (r < 0)
196✔
355
                                return RET_GATHER(ret, r);
×
356
                }
357

358
                /* To avoid racing against processes which fork quicker than we can kill them, we repeat this
359
                 * until no new pids need to be killed. */
360

361
        } while (!done);
5,889✔
362

363
        return ret;
364
}
365

366
int cg_kill_recursive(
15,452✔
367
                const char *path,
368
                int sig,
369
                CGroupFlags flags,
370
                Set *killed_pids,
371
                cg_kill_log_func_t log_kill,
372
                void *userdata) {
373

374
        _cleanup_set_free_ Set *allocated_set = NULL;
×
375
        _cleanup_closedir_ DIR *d = NULL;
15,452✔
376
        int r, ret;
15,452✔
377

378
        assert(path);
15,452✔
379
        assert(sig >= 0);
15,452✔
380

381
        if (!killed_pids) {
15,452✔
382
                killed_pids = allocated_set = set_new(NULL);
14,944✔
383
                if (!killed_pids)
14,944✔
384
                        return -ENOMEM;
385
        }
386

387
        ret = cg_kill(path, sig, flags, killed_pids, log_kill, userdata);
15,452✔
388

389
        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
15,452✔
390
        if (r < 0) {
15,452✔
391
                if (r != -ENOENT)
10,318✔
392
                        RET_GATHER(ret, log_debug_errno(r, "Failed to enumerate cgroup '%s' subgroups: %m", path));
×
393

394
                return ret;
10,318✔
395
        }
396

397
        for (;;) {
5,394✔
398
                _cleanup_free_ char *fn = NULL, *p = NULL;
5,264✔
399

400
                r = cg_read_subgroup(d, &fn);
5,264✔
401
                if (r < 0) {
5,264✔
402
                        RET_GATHER(ret, log_debug_errno(r, "Failed to read subgroup from cgroup '%s': %m", path));
×
403
                        break;
404
                }
405
                if (r == 0)
5,264✔
406
                        break;
407

408
                p = path_join(empty_to_root(path), fn);
130✔
409
                if (!p)
130✔
410
                        return -ENOMEM;
×
411

412
                r = cg_kill_recursive(p, sig, flags, killed_pids, log_kill, userdata);
130✔
413
                if (r < 0)
130✔
414
                        log_debug_errno(r, "Failed to recursively kill processes in cgroup '%s': %m", p);
×
415
                if (r != 0 && ret >= 0)
130✔
416
                        ret = r;
17✔
417
        }
418

419
        return ret;
5,134✔
420
}
421

422
int cg_kill_kernel_sigkill(const char *path) {
×
423
        _cleanup_free_ char *killfile = NULL;
×
424
        int r;
×
425

426
        /* Kills the cgroup at `path` directly by writing to its cgroup.kill file.  This sends SIGKILL to all
427
         * processes in the cgroup and has the advantage of being completely atomic, unlike cg_kill_items(). */
428

429
        assert(path);
×
430

431
        if (!cg_kill_supported())
×
432
                return -EOPNOTSUPP;
433

434
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.kill", &killfile);
×
435
        if (r < 0)
×
436
                return r;
437

438
        r = write_string_file(killfile, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
×
439
        if (r < 0)
×
440
                return log_debug_errno(r, "Failed to write to cgroup.kill for cgroup '%s': %m", path);
×
441

442
        return 0;
443
}
444

445
static const char *controller_to_dirname(const char *controller) {
×
446
        assert(controller);
×
447

448
        /* Converts a controller name to the directory name below /sys/fs/cgroup/ we want to mount it
449
         * to. Effectively, this just cuts off the name= prefixed used for named hierarchies, if it is
450
         * specified. */
451

452
        if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
×
453
                if (cg_hybrid_unified() > 0)
×
454
                        controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
455
                else
456
                        controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
×
457
        }
458

459
        return startswith(controller, "name=") ?: controller;
×
460
}
461

462
static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **ret) {
×
463
        const char *dn;
×
464
        char *t = NULL;
×
465

466
        assert(ret);
×
467
        assert(controller);
×
468

469
        dn = controller_to_dirname(controller);
×
470

471
        if (isempty(path) && isempty(suffix))
×
472
                t = path_join("/sys/fs/cgroup", dn);
×
473
        else if (isempty(path))
×
474
                t = path_join("/sys/fs/cgroup", dn, suffix);
×
475
        else if (isempty(suffix))
×
476
                t = path_join("/sys/fs/cgroup", dn, path);
×
477
        else
478
                t = path_join("/sys/fs/cgroup", dn, path, suffix);
×
479
        if (!t)
×
480
                return -ENOMEM;
481

482
        *ret = t;
×
483
        return 0;
×
484
}
485

486
static int join_path_unified(const char *path, const char *suffix, char **ret) {
276,346✔
487
        char *t;
276,346✔
488

489
        assert(ret);
276,346✔
490

491
        if (isempty(path) && isempty(suffix))
301,624✔
492
                t = strdup("/sys/fs/cgroup");
1,447✔
493
        else if (isempty(path))
274,899✔
494
                t = path_join("/sys/fs/cgroup", suffix);
23,831✔
495
        else if (isempty(suffix))
251,068✔
496
                t = path_join("/sys/fs/cgroup", path);
96,916✔
497
        else
498
                t = path_join("/sys/fs/cgroup", path, suffix);
154,152✔
499
        if (!t)
276,346✔
500
                return -ENOMEM;
501

502
        *ret = t;
276,346✔
503
        return 0;
276,346✔
504
}
505

506
int cg_get_path(const char *controller, const char *path, const char *suffix, char **ret) {
276,592✔
507
        int r;
276,592✔
508

509
        assert(ret);
276,592✔
510

511
        if (!controller) {
276,592✔
512
                char *t;
246✔
513

514
                /* If no controller is specified, we return the path *below* the controllers, without any
515
                 * prefix. */
516

517
                if (isempty(path) && isempty(suffix))
246✔
518
                        return -EINVAL;
519

520
                if (isempty(suffix))
246✔
521
                        t = strdup(path);
×
522
                else if (isempty(path))
246✔
523
                        t = strdup(suffix);
×
524
                else
525
                        t = path_join(path, suffix);
246✔
526
                if (!t)
246✔
527
                        return -ENOMEM;
528

529
                *ret = path_simplify(t);
246✔
530
                return 0;
246✔
531
        }
532

533
        if (!cg_controller_is_valid(controller))
276,346✔
534
                return -EINVAL;
535

536
        r = cg_all_unified();
276,346✔
537
        if (r < 0)
276,346✔
538
                return r;
539
        if (r > 0)
276,346✔
540
                r = join_path_unified(path, suffix, ret);
276,346✔
541
        else
542
                r = join_path_legacy(controller, path, suffix, ret);
×
543
        if (r < 0)
276,346✔
544
                return r;
545

546
        path_simplify(*ret);
276,346✔
547
        return 0;
276,346✔
548
}
549

550
static int controller_is_v1_accessible(const char *root, const char *controller) {
×
551
        const char *cpath, *dn;
×
552

553
        assert(controller);
×
554

555
        dn = controller_to_dirname(controller);
×
556

557
        /* If root if specified, we check that:
558
         * - possible subcgroup is created at root,
559
         * - we can modify the hierarchy. */
560

561
        cpath = strjoina("/sys/fs/cgroup/", dn, root, root ? "/cgroup.procs" : NULL);
×
562
        return access_nofollow(cpath, root ? W_OK : F_OK);
×
563
}
564

565
int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **ret) {
19,304✔
566
        int r;
19,304✔
567

568
        assert(controller);
19,304✔
569
        assert(ret);
19,304✔
570

571
        if (!cg_controller_is_valid(controller))
19,304✔
572
                return -EINVAL;
573

574
        r = cg_all_unified();
19,304✔
575
        if (r < 0)
19,304✔
576
                return r;
577
        if (r > 0) {
19,304✔
578
                /* In the unified hierarchy all controllers are considered accessible,
579
                 * except for the named hierarchies */
580
                if (startswith(controller, "name="))
19,304✔
581
                        return -EOPNOTSUPP;
582
        } else {
583
                /* Check if the specified controller is actually accessible */
584
                r = controller_is_v1_accessible(NULL, controller);
×
585
                if (r < 0)
×
586
                        return r;
587
        }
588

589
        return cg_get_path(controller, path, suffix, ret);
19,304✔
590
}
591

592
int cg_set_xattr(const char *path, const char *name, const void *value, size_t size, int flags) {
7,238✔
593
        _cleanup_free_ char *fs = NULL;
7,238✔
594
        int r;
7,238✔
595

596
        assert(path);
7,238✔
597
        assert(name);
7,238✔
598
        assert(value || size <= 0);
7,238✔
599

600
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
7,238✔
601
        if (r < 0)
7,238✔
602
                return r;
603

604
        return RET_NERRNO(setxattr(fs, name, value, size, flags));
7,238✔
605
}
606

607
int cg_get_xattr(const char *path, const char *name, char **ret, size_t *ret_size) {
17,134✔
608
        _cleanup_free_ char *fs = NULL;
17,134✔
609
        int r;
17,134✔
610

611
        assert(path);
17,134✔
612
        assert(name);
17,134✔
613

614
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
17,134✔
615
        if (r < 0)
17,134✔
616
                return r;
617

618
        return lgetxattr_malloc(fs, name, ret, ret_size);
17,134✔
619
}
620

621
int cg_get_xattr_bool(const char *path, const char *name) {
167✔
622
        _cleanup_free_ char *fs = NULL;
167✔
623
        int r;
167✔
624

625
        assert(path);
167✔
626
        assert(name);
167✔
627

628
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
167✔
629
        if (r < 0)
167✔
630
                return r;
631

632
        return getxattr_at_bool(AT_FDCWD, fs, name, /* at_flags= */ 0);
167✔
633
}
634

635
int cg_remove_xattr(const char *path, const char *name) {
35,541✔
636
        _cleanup_free_ char *fs = NULL;
35,541✔
637
        int r;
35,541✔
638

639
        assert(path);
35,541✔
640
        assert(name);
35,541✔
641

642
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
35,541✔
643
        if (r < 0)
35,541✔
644
                return r;
645

646
        return RET_NERRNO(removexattr(fs, name));
71,082✔
647
}
648

649
int cg_pid_get_path(const char *controller, pid_t pid, char **ret_path) {
53,025✔
650
        _cleanup_fclose_ FILE *f = NULL;
53,025✔
651
        const char *fs, *controller_str = NULL;  /* avoid false maybe-uninitialized warning */
53,025✔
652
        int unified, r;
53,025✔
653

654
        assert(pid >= 0);
53,025✔
655
        assert(ret_path);
53,025✔
656

657
        if (controller) {
53,025✔
658
                if (!cg_controller_is_valid(controller))
52,829✔
659
                        return -EINVAL;
660
        } else
661
                controller = SYSTEMD_CGROUP_CONTROLLER;
662

663
        unified = cg_unified_controller(controller);
53,025✔
664
        if (unified < 0)
53,025✔
665
                return unified;
666
        if (unified == 0) {
53,025✔
667
                if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
×
668
                        controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
669
                else
670
                        controller_str = controller;
×
671
        }
672

673
        fs = procfs_file_alloca(pid, "cgroup");
59,285✔
674
        r = fopen_unlocked(fs, "re", &f);
53,025✔
675
        if (r == -ENOENT)
53,025✔
676
                return -ESRCH;
677
        if (r < 0)
49,356✔
678
                return r;
679

680
        for (;;) {
49,356✔
681
                _cleanup_free_ char *line = NULL;
49,356✔
682
                char *e;
49,356✔
683

684
                r = read_line(f, LONG_LINE_MAX, &line);
49,356✔
685
                if (r < 0)
49,356✔
686
                        return r;
687
                if (r == 0)
49,350✔
688
                        return -ENODATA;
689

690
                if (unified) {
49,350✔
691
                        e = startswith(line, "0:");
49,350✔
692
                        if (!e)
49,350✔
693
                                continue;
×
694

695
                        e = strchr(e, ':');
49,350✔
696
                        if (!e)
49,350✔
697
                                continue;
×
698
                } else {
699
                        char *l;
×
700

701
                        l = strchr(line, ':');
×
702
                        if (!l)
×
703
                                continue;
×
704

705
                        l++;
×
706
                        e = strchr(l, ':');
×
707
                        if (!e)
×
708
                                continue;
×
709
                        *e = 0;
×
710

711
                        assert(controller_str);
×
712
                        r = string_contains_word(l, ",", controller_str);
×
713
                        if (r < 0)
×
714
                                return r;
715
                        if (r == 0)
×
716
                                continue;
×
717
                }
718

719
                _cleanup_free_ char *path = strdup(e + 1);
49,350✔
720
                if (!path)
49,350✔
721
                        return -ENOMEM;
722

723
                /* Refuse cgroup paths from outside our cgroup namespace */
724
                if (startswith(path, "/../"))
49,350✔
725
                        return -EUNATCH;
726

727
                /* Truncate suffix indicating the process is a zombie */
728
                e = endswith(path, " (deleted)");
49,350✔
729
                if (e)
49,350✔
730
                        *e = 0;
145✔
731

732
                *ret_path = TAKE_PTR(path);
49,350✔
733
                return 0;
49,350✔
734
        }
735
}
736

737
int cg_pidref_get_path(const char *controller, const PidRef *pidref, char **ret_path) {
12,146✔
738
        _cleanup_free_ char *path = NULL;
12,146✔
739
        int r;
12,146✔
740

741
        assert(ret_path);
12,146✔
742

743
        if (!pidref_is_set(pidref))
12,146✔
744
                return -ESRCH;
745
        if (pidref_is_remote(pidref))
24,292✔
746
                return -EREMOTE;
747

748
        // XXX: Ideally we'd use pidfd_get_cgroupid() + cg_path_from_cgroupid() here, to extract this
749
        // bit of information from pidfd directly. However, the latter requires privilege and it's
750
        // not entirely clear how to handle cgroups from outer namespace.
751

752
        r = cg_pid_get_path(controller, pidref->pid, &path);
12,146✔
753
        if (r < 0)
12,146✔
754
                return r;
755

756
        /* Before we return the path, make sure the procfs entry for this pid still matches the pidref */
757
        r = pidref_verify(pidref);
12,145✔
758
        if (r < 0)
12,145✔
759
                return r;
760

761
        *ret_path = TAKE_PTR(path);
12,145✔
762
        return 0;
12,145✔
763
}
764

765
int cg_is_empty(const char *controller, const char *path) {
2,792✔
766
        _cleanup_free_ char *t = NULL;
2,792✔
767
        int r;
2,792✔
768

769
        /* Check if the cgroup hierarchy under 'path' is empty. On cgroup v2 it's exposed via the "populated"
770
         * attribute of "cgroup.events". */
771

772
        assert(path);
2,792✔
773

774
        /* The root cgroup is always populated */
775
        if (empty_or_root(path))
2,792✔
776
                return false;
777

778
        r = cg_get_keyed_attribute(controller, path, "cgroup.events", STRV_MAKE("populated"), &t);
2,792✔
779
        if (r == -ENOENT)
2,792✔
780
                return true;
781
        if (r < 0)
281✔
782
                return r;
783

784
        return streq(t, "0");
281✔
785
}
786

787
int cg_split_spec(const char *spec, char **ret_controller, char **ret_path) {
23✔
788
        _cleanup_free_ char *controller = NULL, *path = NULL;
23✔
789
        int r;
23✔
790

791
        assert(spec);
23✔
792

793
        if (*spec == '/') {
23✔
794
                if (!path_is_normalized(spec))
15✔
795
                        return -EINVAL;
796

797
                if (ret_path) {
15✔
798
                        r = path_simplify_alloc(spec, &path);
15✔
799
                        if (r < 0)
15✔
800
                                return r;
801
                }
802

803
        } else {
804
                const char *e;
8✔
805

806
                e = strchr(spec, ':');
8✔
807
                if (e) {
8✔
808
                        controller = strndup(spec, e-spec);
6✔
809
                        if (!controller)
6✔
810
                                return -ENOMEM;
811
                        if (!cg_controller_is_valid(controller))
6✔
812
                                return -EINVAL;
813

814
                        if (!isempty(e + 1)) {
3✔
815
                                path = strdup(e+1);
2✔
816
                                if (!path)
2✔
817
                                        return -ENOMEM;
818

819
                                if (!path_is_normalized(path) ||
2✔
820
                                    !path_is_absolute(path))
2✔
821
                                        return -EINVAL;
822

823
                                path_simplify(path);
1✔
824
                        }
825

826
                } else {
827
                        if (!cg_controller_is_valid(spec))
2✔
828
                                return -EINVAL;
829

830
                        if (ret_controller) {
1✔
831
                                controller = strdup(spec);
1✔
832
                                if (!controller)
1✔
833
                                        return -ENOMEM;
834
                        }
835
                }
836
        }
837

838
        if (ret_controller)
18✔
839
                *ret_controller = TAKE_PTR(controller);
18✔
840
        if (ret_path)
18✔
841
                *ret_path = TAKE_PTR(path);
18✔
842
        return 0;
843
}
844

845
int cg_mangle_path(const char *path, char **ret) {
435✔
846
        _cleanup_free_ char *c = NULL, *p = NULL;
435✔
847
        int r;
435✔
848

849
        assert(path);
435✔
850
        assert(ret);
435✔
851

852
        /* First, check if it already is a filesystem path */
853
        if (path_startswith(path, "/sys/fs/cgroup"))
435✔
854
                return path_simplify_alloc(path, ret);
431✔
855

856
        /* Otherwise, treat it as cg spec */
857
        r = cg_split_spec(path, &c, &p);
4✔
858
        if (r < 0)
4✔
859
                return r;
860

861
        return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, ret);
8✔
862
}
863

864
int cg_get_root_path(char **ret_path) {
23,804✔
865
        char *p, *e;
23,804✔
866
        int r;
23,804✔
867

868
        assert(ret_path);
23,804✔
869

870
        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
23,804✔
871
        if (r < 0)
23,804✔
872
                return r;
23,804✔
873

874
        e = endswith(p, "/" SPECIAL_INIT_SCOPE);
23,804✔
875
        if (e)
23,804✔
876
                *e = 0;
23,771✔
877

878
        *ret_path = p;
23,804✔
879
        return 0;
23,804✔
880
}
881

882
int cg_shift_path(const char *cgroup, const char *root, const char **ret_shifted) {
12,011✔
883
        int r;
12,011✔
884

885
        assert(cgroup);
12,011✔
886
        assert(ret_shifted);
12,011✔
887

888
        _cleanup_free_ char *rt = NULL;
12,011✔
889
        if (!root) {
12,011✔
890
                /* If the root was specified let's use that, otherwise
891
                 * let's determine it from PID 1 */
892

893
                r = cg_get_root_path(&rt);
2,459✔
894
                if (r < 0)
2,459✔
895
                        return r;
896

897
                root = rt;
2,459✔
898
        }
899

900
        *ret_shifted = path_startswith_full(cgroup, root, PATH_STARTSWITH_RETURN_LEADING_SLASH|PATH_STARTSWITH_REFUSE_DOT_DOT) ?: cgroup;
12,011✔
901
        return 0;
12,011✔
902
}
903

904
int cg_pid_get_path_shifted(pid_t pid, const char *root, char **ret_cgroup) {
15,509✔
905
        _cleanup_free_ char *raw = NULL;
15,509✔
906
        const char *c;
15,509✔
907
        int r;
15,509✔
908

909
        assert(pid >= 0);
15,509✔
910
        assert(ret_cgroup);
15,509✔
911

912
        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
15,509✔
913
        if (r < 0)
15,509✔
914
                return r;
915

916
        r = cg_shift_path(raw, root, &c);
11,835✔
917
        if (r < 0)
11,835✔
918
                return r;
919

920
        if (c == raw) {
11,835✔
921
                *ret_cgroup = TAKE_PTR(raw);
11,835✔
922
                return 0;
11,835✔
923
        }
924

925
        return strdup_to(ret_cgroup, c);
×
926
}
927

928
int cg_path_decode_unit(const char *cgroup, char **ret_unit) {
32,968✔
929
        assert(cgroup);
32,968✔
930

931
        size_t n = strcspn(cgroup, "/");
32,968✔
932
        if (n < 3)
32,968✔
933
                return -ENXIO;
934

935
        char *c = strndupa_safe(cgroup, n);
32,957✔
936
        c = cg_unescape(c);
32,957✔
937

938
        if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
32,957✔
939
                return -ENXIO;
940

941
        if (ret_unit)
32,948✔
942
                return strdup_to(ret_unit, c);
32,948✔
943

944
        return 0;
945
}
946

947
static bool valid_slice_name(const char *p, size_t n) {
117,220✔
948
        assert(p || n == 0);
117,220✔
949

950
        if (n < STRLEN("x.slice"))
117,220✔
951
                return false;
952

953
        char *c = strndupa_safe(p, n);
117,179✔
954
        if (!endswith(c, ".slice"))
117,179✔
955
                return false;
956

957
        return unit_name_is_valid(cg_unescape(c), UNIT_NAME_PLAIN);
59,502✔
958
}
959

960
static const char* skip_slices(const char *p) {
41,959✔
961
        assert(p);
41,959✔
962

963
        /* Skips over all slice assignments */
964

965
        for (;;) {
128,579✔
966
                size_t n;
85,269✔
967

968
                p += strspn(p, "/");
85,269✔
969

970
                n = strcspn(p, "/");
85,269✔
971
                if (!valid_slice_name(p, n))
85,269✔
972
                        return p;
41,959✔
973

974
                p += n;
43,310✔
975
        }
976
}
977

978
int cg_path_get_unit_full(const char *path, char **ret_unit, char **ret_subgroup) {
17,425✔
979
        int r;
17,425✔
980

981
        assert(path);
17,425✔
982

983
        const char *e = skip_slices(path);
17,425✔
984

985
        _cleanup_free_ char *unit = NULL;
17,425✔
986
        r = cg_path_decode_unit(e, &unit);
17,425✔
987
        if (r < 0)
17,425✔
988
                return r;
989

990
        /* We skipped over the slices, don't accept any now */
991
        if (endswith(unit, ".slice"))
17,409✔
992
                return -ENXIO;
993

994
        if (ret_subgroup) {
17,409✔
995
                _cleanup_free_ char *subgroup = NULL;
×
996
                e += strcspn(e, "/");
701✔
997
                e += strspn(e, "/");
701✔
998

999
                if (isempty(e))
701✔
1000
                        subgroup = NULL;
1001
                else {
1002
                        subgroup = strdup(e);
236✔
1003
                        if (!subgroup)
236✔
1004
                                return -ENOMEM;
×
1005
                }
1006

1007
                path_simplify(subgroup);
701✔
1008

1009
                *ret_subgroup = TAKE_PTR(subgroup);
701✔
1010
        }
1011

1012
        if (ret_unit)
17,409✔
1013
                *ret_unit = TAKE_PTR(unit);
17,409✔
1014

1015
        return 0;
1016
}
1017

1018
int cg_path_get_unit_path(const char *path, char **ret) {
9,392✔
1019
        _cleanup_free_ char *path_copy = NULL;
9,392✔
1020
        char *unit_name;
9,392✔
1021

1022
        assert(path);
9,392✔
1023
        assert(ret);
9,392✔
1024

1025
        path_copy = strdup(path);
9,392✔
1026
        if (!path_copy)
9,392✔
1027
                return -ENOMEM;
1028

1029
        unit_name = (char*) skip_slices(path_copy);
9,392✔
1030
        unit_name[strcspn(unit_name, "/")] = 0;
9,392✔
1031

1032
        if (!unit_name_is_valid(cg_unescape(unit_name), UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
9,392✔
1033
                return -ENXIO;
1034

1035
        *ret = TAKE_PTR(path_copy);
9,389✔
1036

1037
        return 0;
9,389✔
1038
}
1039

1040
int cg_pid_get_unit_full(pid_t pid, char **ret_unit, char **ret_subgroup) {
810✔
1041
        int r;
810✔
1042

1043
        _cleanup_free_ char *cgroup = NULL;
810✔
1044
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
810✔
1045
        if (r < 0)
810✔
1046
                return r;
1047

1048
        return cg_path_get_unit_full(cgroup, ret_unit, ret_subgroup);
810✔
1049
}
1050

1051
int cg_pidref_get_unit_full(const PidRef *pidref, char **ret_unit, char **ret_subgroup) {
683✔
1052
        int r;
683✔
1053

1054
        if (!pidref_is_set(pidref))
683✔
1055
                return -ESRCH;
683✔
1056
        if (pidref_is_remote(pidref))
1,366✔
1057
                return -EREMOTE;
1058

1059
        _cleanup_free_ char *unit = NULL, *subgroup = NULL;
683✔
1060
        r = cg_pid_get_unit_full(pidref->pid, &unit, &subgroup);
683✔
1061
        if (r < 0)
683✔
1062
                return r;
1063

1064
        r = pidref_verify(pidref);
683✔
1065
        if (r < 0)
683✔
1066
                return r;
1067

1068
        if (ret_unit)
683✔
1069
                *ret_unit = TAKE_PTR(unit);
683✔
1070
        if (ret_subgroup)
683✔
1071
                *ret_subgroup = TAKE_PTR(subgroup);
52✔
1072
        return 0;
1073
}
1074

1075
static const char* skip_session(const char *p) {
14,791✔
1076
        size_t n;
14,791✔
1077

1078
        /* Skip session-*.scope, but require it to be there. */
1079

1080
        if (isempty(p))
14,791✔
1081
                return NULL;
1082

1083
        p += strspn(p, "/");
14,787✔
1084

1085
        n = strcspn(p, "/");
14,787✔
1086
        if (n < STRLEN("session-x.scope"))
14,787✔
1087
                return NULL;
1088

1089
        const char *s = startswith(p, "session-");
14,618✔
1090
        if (!s)
14,618✔
1091
                return NULL;
1092

1093
        /* Note that session scopes never need unescaping, since they cannot conflict with the kernel's
1094
         * own names, hence we don't need to call cg_unescape() here. */
1095
        char *f = strndupa_safe(s, p + n - s),
25✔
1096
             *e = endswith(f, ".scope");
25✔
1097
        if (!e)
25✔
1098
                return NULL;
1099
        *e = '\0';
25✔
1100

1101
        if (!session_id_valid(f))
25✔
1102
                return NULL;
1103

1104
        return skip_leading_slash(p + n);
25✔
1105
}
1106

1107
static const char* skip_user_manager(const char *p) {
15,142✔
1108
        size_t n;
15,142✔
1109

1110
        /* Skip user@*.service or capsule@*.service, but require either of them to be there. */
1111

1112
        if (isempty(p))
15,142✔
1113
                return NULL;
15,142✔
1114

1115
        p += strspn(p, "/");
15,138✔
1116

1117
        n = strcspn(p, "/");
15,138✔
1118
        if (n < CONST_MIN(STRLEN("user@x.service"), STRLEN("capsule@x.service")))
15,138✔
1119
                return NULL;
1120

1121
        /* Any possible errors from functions called below are converted to NULL return, so our callers won't
1122
         * resolve user/capsule name. */
1123
        _cleanup_free_ char *unit_name = strndup(p, n);
14,971✔
1124
        if (!unit_name)
14,971✔
1125
                return NULL;
1126

1127
        _cleanup_free_ char *i = NULL;
14,971✔
1128
        UnitNameFlags type = unit_name_to_instance(unit_name, &i);
14,971✔
1129

1130
        if (type != UNIT_NAME_INSTANCE)
14,971✔
1131
                return NULL;
1132

1133
        /* Note that user manager services never need unescaping, since they cannot conflict with the
1134
         * kernel's own names, hence we don't need to call cg_unescape() here.  Prudently check validity of
1135
         * instance names, they should be always valid as we validate them upon unit start. */
1136
        if (!(startswith(unit_name, "user@") && parse_uid(i, NULL) >= 0) &&
520✔
1137
            !(startswith(unit_name, "capsule@") && capsule_name_is_valid(i) > 0))
92✔
1138
                return NULL;
82✔
1139

1140
        return skip_leading_slash(p + n);
351✔
1141
}
1142

1143
static const char* skip_user_prefix(const char *path) {
15,142✔
1144
        const char *e, *t;
15,142✔
1145

1146
        assert(path);
15,142✔
1147

1148
        /* Skip slices, if there are any */
1149
        e = skip_slices(path);
15,142✔
1150

1151
        /* Skip the user manager, if it's in the path now... */
1152
        t = skip_user_manager(e);
15,142✔
1153
        if (t)
15,142✔
1154
                return t;
1155

1156
        /* Alternatively skip the user session if it is in the path... */
1157
        return skip_session(e);
14,791✔
1158
}
1159

1160
int cg_path_get_user_unit_full(const char *path, char **ret_unit, char **ret_subgroup) {
7,577✔
1161
        const char *t;
7,577✔
1162

1163
        assert(path);
7,577✔
1164

1165
        t = skip_user_prefix(path);
7,577✔
1166
        if (!t)
7,577✔
1167
                return -ENXIO;
1168

1169
        /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
1170
         * parser. */
1171
        return cg_path_get_unit_full(t, ret_unit, ret_subgroup);
192✔
1172
}
1173

1174
int cg_pid_get_user_unit_full(pid_t pid, char **ret_unit, char **ret_subgroup) {
13✔
1175
        int r;
13✔
1176

1177
        _cleanup_free_ char *cgroup = NULL;
13✔
1178
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
13✔
1179
        if (r < 0)
13✔
1180
                return r;
1181

1182
        return cg_path_get_user_unit_full(cgroup, ret_unit, ret_subgroup);
13✔
1183
}
1184

1185
int cg_pidref_get_user_unit_full(const PidRef *pidref, char **ret_unit, char **ret_subgroup) {
13✔
1186
        int r;
13✔
1187

1188
        if (!pidref_is_set(pidref))
13✔
1189
                return -ESRCH;
13✔
1190
        if (pidref_is_remote(pidref))
26✔
1191
                return -EREMOTE;
1192

1193
        _cleanup_free_ char *unit = NULL, *subgroup = NULL;
13✔
1194
        r = cg_pid_get_user_unit_full(pidref->pid, &unit, &subgroup);
13✔
1195
        if (r < 0)
13✔
1196
                return r;
1197

1198
        r = pidref_verify(pidref);
4✔
1199
        if (r < 0)
4✔
1200
                return r;
1201

1202
        if (ret_unit)
4✔
1203
                *ret_unit = TAKE_PTR(unit);
4✔
1204
        if (ret_subgroup)
4✔
1205
                *ret_subgroup = TAKE_PTR(subgroup);
×
1206
        return 0;
1207
}
1208

1209
int cg_path_get_machine_name(const char *path, char **ret_machine) {
40✔
1210
        _cleanup_free_ char *u = NULL;
40✔
1211
        const char *sl;
40✔
1212
        int r;
40✔
1213

1214
        r = cg_path_get_unit(path, &u);
40✔
1215
        if (r < 0)
40✔
1216
                return r;
1217

1218
        sl = strjoina("/run/systemd/machines/unit:", u);
200✔
1219
        return readlink_malloc(sl, ret_machine);
40✔
1220
}
1221

1222
int cg_pid_get_machine_name(pid_t pid, char **ret_machine) {
40✔
1223
        _cleanup_free_ char *cgroup = NULL;
40✔
1224
        int r;
40✔
1225

1226
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
40✔
1227
        if (r < 0)
40✔
1228
                return r;
1229

1230
        return cg_path_get_machine_name(cgroup, ret_machine);
40✔
1231
}
1232

1233
int cg_path_get_session(const char *path, char **ret_session) {
8,751✔
1234
        _cleanup_free_ char *unit = NULL;
8,751✔
1235
        char *start, *end;
8,751✔
1236
        int r;
8,751✔
1237

1238
        assert(path);
8,751✔
1239

1240
        r = cg_path_get_unit(path, &unit);
8,751✔
1241
        if (r < 0)
8,751✔
1242
                return r;
1243

1244
        start = startswith(unit, "session-");
8,750✔
1245
        if (!start)
8,750✔
1246
                return -ENXIO;
1247
        end = endswith(start, ".scope");
428✔
1248
        if (!end)
428✔
1249
                return -ENXIO;
1250

1251
        *end = 0;
428✔
1252
        if (!session_id_valid(start))
428✔
1253
                return -ENXIO;
1254

1255
        if (!ret_session)
427✔
1256
                return 0;
1257

1258
        return strdup_to(ret_session, start);
427✔
1259
}
1260

1261
int cg_pid_get_session(pid_t pid, char **ret_session) {
1,127✔
1262
        _cleanup_free_ char *cgroup = NULL;
1,127✔
1263
        int r;
1,127✔
1264

1265
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1,127✔
1266
        if (r < 0)
1,127✔
1267
                return r;
1268

1269
        return cg_path_get_session(cgroup, ret_session);
1,127✔
1270
}
1271

1272
int cg_pidref_get_session(const PidRef *pidref, char **ret) {
433✔
1273
        int r;
433✔
1274

1275
        if (!pidref_is_set(pidref))
433✔
1276
                return -ESRCH;
433✔
1277
        if (pidref_is_remote(pidref))
866✔
1278
                return -EREMOTE;
1279

1280
        _cleanup_free_ char *session = NULL;
433✔
1281
        r = cg_pid_get_session(pidref->pid, &session);
433✔
1282
        if (r < 0)
433✔
1283
                return r;
1284

1285
        r = pidref_verify(pidref);
381✔
1286
        if (r < 0)
381✔
1287
                return r;
1288

1289
        if (ret)
381✔
1290
                *ret = TAKE_PTR(session);
381✔
1291
        return 0;
1292
}
1293

1294
int cg_path_get_owner_uid(const char *path, uid_t *ret_uid) {
7,958✔
1295
        _cleanup_free_ char *slice = NULL;
7,958✔
1296
        char *start, *end;
7,958✔
1297
        int r;
7,958✔
1298

1299
        assert(path);
7,958✔
1300

1301
        r = cg_path_get_slice(path, &slice);
7,958✔
1302
        if (r < 0)
7,958✔
1303
                return r;
1304

1305
        start = startswith(slice, "user-");
7,958✔
1306
        if (!start)
7,958✔
1307
                return -ENXIO;
1308

1309
        end = endswith(start, ".slice");
417✔
1310
        if (!end)
417✔
1311
                return -ENXIO;
1312

1313
        *end = 0;
417✔
1314
        if (parse_uid(start, ret_uid) < 0)
417✔
1315
                return -ENXIO;
×
1316

1317
        return 0;
1318
}
1319

1320
int cg_pid_get_owner_uid(pid_t pid, uid_t *ret_uid) {
356✔
1321
        _cleanup_free_ char *cgroup = NULL;
356✔
1322
        int r;
356✔
1323

1324
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
356✔
1325
        if (r < 0)
356✔
1326
                return r;
1327

1328
        return cg_path_get_owner_uid(cgroup, ret_uid);
356✔
1329
}
1330

1331
int cg_pidref_get_owner_uid(const PidRef *pidref, uid_t *ret) {
52✔
1332
        int r;
52✔
1333

1334
        if (!pidref_is_set(pidref))
52✔
1335
                return -ESRCH;
52✔
1336
        if (pidref_is_remote(pidref))
52✔
1337
                return -EREMOTE;
1338

1339
        uid_t uid;
52✔
1340
        r = cg_pid_get_owner_uid(pidref->pid, &uid);
52✔
1341
        if (r < 0)
52✔
1342
                return r;
1343

1344
        r = pidref_verify(pidref);
12✔
1345
        if (r < 0)
12✔
1346
                return r;
1347

1348
        if (ret)
12✔
1349
                *ret = uid;
12✔
1350

1351
        return 0;
1352
}
1353

1354
int cg_path_get_slice(const char *p, char **ret_slice) {
15,759✔
1355
        const char *e = NULL;
15,759✔
1356

1357
        assert(p);
15,759✔
1358

1359
        /* Finds the right-most slice unit from the beginning, but stops before we come to
1360
         * the first non-slice unit. */
1361

1362
        for (;;) {
48,143✔
1363
                const char *s;
31,951✔
1364
                int n;
31,951✔
1365

1366
                n = path_find_first_component(&p, /* accept_dot_dot = */ false, &s);
31,951✔
1367
                if (n < 0)
31,951✔
1368
                        return n;
×
1369
                if (!valid_slice_name(s, n))
31,951✔
1370
                        break;
1371

1372
                e = s;
16,192✔
1373
        }
1374

1375
        if (e)
15,759✔
1376
                return cg_path_decode_unit(e, ret_slice);
15,534✔
1377

1378
        if (ret_slice)
225✔
1379
                return strdup_to(ret_slice, SPECIAL_ROOT_SLICE);
225✔
1380

1381
        return 0;
1382
}
1383

1384
int cg_pid_get_slice(pid_t pid, char **ret_slice) {
59✔
1385
        _cleanup_free_ char *cgroup = NULL;
59✔
1386
        int r;
59✔
1387

1388
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
59✔
1389
        if (r < 0)
59✔
1390
                return r;
1391

1392
        return cg_path_get_slice(cgroup, ret_slice);
59✔
1393
}
1394

1395
int cg_path_get_user_slice(const char *p, char **ret_slice) {
7,565✔
1396
        const char *t;
7,565✔
1397
        assert(p);
7,565✔
1398

1399
        t = skip_user_prefix(p);
7,565✔
1400
        if (!t)
7,565✔
1401
                return -ENXIO;
1402

1403
        /* And now it looks pretty much the same as for a system slice, so let's just use the same parser
1404
         * from here on. */
1405
        return cg_path_get_slice(t, ret_slice);
184✔
1406
}
1407

1408
int cg_pid_get_user_slice(pid_t pid, char **ret_slice) {
1✔
1409
        _cleanup_free_ char *cgroup = NULL;
1✔
1410
        int r;
1✔
1411

1412
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1✔
1413
        if (r < 0)
1✔
1414
                return r;
1415

1416
        return cg_path_get_user_slice(cgroup, ret_slice);
1✔
1417
}
1418

1419
bool cg_needs_escape(const char *p) {
25,299✔
1420

1421
        /* Checks if the specified path is a valid cgroup name by our rules, or if it must be escaped. Note
1422
         * that we consider escaped cgroup names invalid here, as they need to be escaped a second time if
1423
         * they shall be used. Also note that various names cannot be made valid by escaping even if we
1424
         * return true here (because too long, or contain the forbidden character "/"). */
1425

1426
        if (!filename_is_valid(p))
25,299✔
1427
                return true;
1428

1429
        if (IN_SET(p[0], '_', '.'))
25,295✔
1430
                return true;
1431

1432
        if (STR_IN_SET(p, "notify_on_release", "release_agent", "tasks"))
25,289✔
1433
                return true;
2✔
1434

1435
        if (startswith(p, "cgroup."))
25,287✔
1436
                return true;
1437

1438
        for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
353,990✔
1439
                const char *q;
328,705✔
1440

1441
                q = startswith(p, cgroup_controller_to_string(c));
328,705✔
1442
                if (!q)
328,705✔
1443
                        continue;
328,705✔
1444

1445
                if (q[0] == '.')
×
1446
                        return true;
1447
        }
1448

1449
        return false;
1450
}
1451

1452
int cg_escape(const char *p, char **ret) {
25,042✔
1453
        _cleanup_free_ char *n = NULL;
25,042✔
1454

1455
        /* This implements very minimal escaping for names to be used as file names in the cgroup tree: any
1456
         * name which might conflict with a kernel name or is prefixed with '_' is prefixed with a '_'. That
1457
         * way, when reading cgroup names it is sufficient to remove a single prefixing underscore if there
1458
         * is one. */
1459

1460
        /* The return value of this function (unlike cg_unescape()) needs free()! */
1461

1462
        if (cg_needs_escape(p)) {
25,042✔
1463
                n = strjoin("_", p);
7✔
1464
                if (!n)
7✔
1465
                        return -ENOMEM;
1466

1467
                if (!filename_is_valid(n)) /* became invalid due to the prefixing? Or contained things like a slash that cannot be fixed by prefixing? */
7✔
1468
                        return -EINVAL;
1469
        } else {
1470
                n = strdup(p);
25,035✔
1471
                if (!n)
25,035✔
1472
                        return -ENOMEM;
1473
        }
1474

1475
        *ret = TAKE_PTR(n);
25,042✔
1476
        return 0;
25,042✔
1477
}
1478

1479
char* cg_unescape(const char *p) {
102,059✔
1480
        assert(p);
102,059✔
1481

1482
        /* The return value of this function (unlike cg_escape())
1483
         * doesn't need free()! */
1484

1485
        if (p[0] == '_')
102,059✔
1486
                return (char*) p+1;
14✔
1487

1488
        return (char*) p;
1489
}
1490

1491
#define CONTROLLER_VALID                        \
1492
        DIGITS LETTERS                          \
1493
        "_"
1494

1495
bool cg_controller_is_valid(const char *p) {
348,497✔
1496
        const char *t, *s;
348,497✔
1497

1498
        if (!p)
348,497✔
1499
                return false;
1500

1501
        if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
348,497✔
1502
                return true;
1503

1504
        s = startswith(p, "name=");
76,167✔
1505
        if (s)
76,167✔
1506
                p = s;
2✔
1507

1508
        if (IN_SET(*p, 0, '_'))
76,167✔
1509
                return false;
1510

1511
        for (t = p; *t; t++)
484,484✔
1512
                if (!strchr(CONTROLLER_VALID, *t))
408,328✔
1513
                        return false;
1514

1515
        if (t - p > NAME_MAX)
76,156✔
1516
                return false;
×
1517

1518
        return true;
1519
}
1520

1521
int cg_slice_to_path(const char *unit, char **ret) {
10,896✔
1522
        _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
10,896✔
1523
        const char *dash;
10,896✔
1524
        int r;
10,896✔
1525

1526
        assert(unit);
10,896✔
1527
        assert(ret);
10,896✔
1528

1529
        if (streq(unit, SPECIAL_ROOT_SLICE))
10,896✔
1530
                return strdup_to(ret, "");
7✔
1531

1532
        if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
10,889✔
1533
                return -EINVAL;
1534

1535
        if (!endswith(unit, ".slice"))
10,878✔
1536
                return -EINVAL;
1537

1538
        r = unit_name_to_prefix(unit, &p);
10,877✔
1539
        if (r < 0)
10,877✔
1540
                return r;
1541

1542
        dash = strchr(p, '-');
10,877✔
1543

1544
        /* Don't allow initial dashes */
1545
        if (dash == p)
10,877✔
1546
                return -EINVAL;
1547

1548
        while (dash) {
11,455✔
1549
                _cleanup_free_ char *escaped = NULL;
583✔
1550
                char n[dash - p + sizeof(".slice")];
583✔
1551

1552
#if HAS_FEATURE_MEMORY_SANITIZER
1553
                /* msan doesn't instrument stpncpy, so it thinks
1554
                 * n is later used uninitialized:
1555
                 * https://github.com/google/sanitizers/issues/926
1556
                 */
1557
                zero(n);
1558
#endif
1559

1560
                /* Don't allow trailing or double dashes */
1561
                if (IN_SET(dash[1], 0, '-'))
583✔
1562
                        return -EINVAL;
1563

1564
                strcpy(stpncpy(n, p, dash - p), ".slice");
581✔
1565
                if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
581✔
1566
                        return -EINVAL;
1567

1568
                r = cg_escape(n, &escaped);
581✔
1569
                if (r < 0)
581✔
1570
                        return r;
1571

1572
                if (!strextend(&s, escaped, "/"))
581✔
1573
                        return -ENOMEM;
1574

1575
                dash = strchr(dash+1, '-');
581✔
1576
        }
1577

1578
        r = cg_escape(unit, &e);
10,872✔
1579
        if (r < 0)
10,872✔
1580
                return r;
1581

1582
        if (!strextend(&s, e))
10,872✔
1583
                return -ENOMEM;
1584

1585
        *ret = TAKE_PTR(s);
10,872✔
1586
        return 0;
10,872✔
1587
}
1588

1589
int cg_is_threaded(const char *path) {
×
1590
        _cleanup_free_ char *fs = NULL, *contents = NULL;
×
1591
        _cleanup_strv_free_ char **v = NULL;
×
1592
        int r;
×
1593

1594
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.type", &fs);
×
1595
        if (r < 0)
×
1596
                return r;
1597

1598
        r = read_full_virtual_file(fs, &contents, NULL);
×
1599
        if (r == -ENOENT)
×
1600
                return false; /* Assume no. */
1601
        if (r < 0)
×
1602
                return r;
1603

1604
        v = strv_split(contents, NULL);
×
1605
        if (!v)
×
1606
                return -ENOMEM;
1607

1608
        /* If the cgroup is in the threaded mode, it contains "threaded".
1609
         * If one of the parents or siblings is in the threaded mode, it may contain "invalid". */
1610
        return strv_contains(v, "threaded") || strv_contains(v, "invalid");
×
1611
}
1612

1613
int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
48,321✔
1614
        _cleanup_free_ char *p = NULL;
48,321✔
1615
        int r;
48,321✔
1616

1617
        assert(attribute);
48,321✔
1618

1619
        r = cg_get_path(controller, path, attribute, &p);
48,321✔
1620
        if (r < 0)
48,321✔
1621
                return r;
1622

1623
        /* https://lore.kernel.org/all/20250419183545.1982187-1-shakeel.butt@linux.dev/ adds O_NONBLOCK
1624
         * semantics to memory.max and memory.high to skip synchronous memory reclaim when O_NONBLOCK is
1625
         * enabled. Let's always open cgroupv2 attribute files in nonblocking mode to immediately take
1626
         * advantage of this and any other asynchronous resource reclaim that's added to the cgroupv2 API in
1627
         * the future. */
1628
        return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER|WRITE_STRING_FILE_OPEN_NONBLOCKING);
48,321✔
1629
}
1630

1631
int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
51,261✔
1632
        _cleanup_free_ char *p = NULL;
51,261✔
1633
        int r;
51,261✔
1634

1635
        assert(attribute);
51,261✔
1636

1637
        r = cg_get_path(controller, path, attribute, &p);
51,261✔
1638
        if (r < 0)
51,261✔
1639
                return r;
1640

1641
        return read_one_line_file(p, ret);
51,261✔
1642
}
1643

1644
int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret) {
42,370✔
1645
        _cleanup_free_ char *value = NULL;
42,370✔
1646
        uint64_t v;
42,370✔
1647
        int r;
42,370✔
1648

1649
        assert(ret);
42,370✔
1650

1651
        r = cg_get_attribute(controller, path, attribute, &value);
42,370✔
1652
        if (r == -ENOENT)
42,370✔
1653
                return -ENODATA;
1654
        if (r < 0)
40,338✔
1655
                return r;
1656

1657
        if (streq(value, "max")) {
40,338✔
1658
                *ret = CGROUP_LIMIT_MAX;
10,361✔
1659
                return 0;
10,361✔
1660
        }
1661

1662
        r = safe_atou64(value, &v);
29,977✔
1663
        if (r < 0)
29,977✔
1664
                return r;
1665

1666
        *ret = v;
29,977✔
1667
        return 0;
29,977✔
1668
}
1669

1670
int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute) {
57✔
1671
        _cleanup_free_ char *value = NULL;
57✔
1672
        int r;
57✔
1673

1674
        r = cg_get_attribute(controller, path, attribute, &value);
57✔
1675
        if (r == -ENOENT)
57✔
1676
                return -ENODATA;
1677
        if (r < 0)
57✔
1678
                return r;
1679

1680
        return parse_boolean(value);
57✔
1681
}
1682

1683
int cg_get_owner(const char *path, uid_t *ret_uid) {
35✔
1684
        _cleanup_free_ char *f = NULL;
35✔
1685
        struct stat stats;
35✔
1686
        int r;
35✔
1687

1688
        assert(ret_uid);
35✔
1689

1690
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &f);
35✔
1691
        if (r < 0)
35✔
1692
                return r;
1693

1694
        if (stat(f, &stats) < 0)
35✔
1695
                return -errno;
16✔
1696

1697
        r = stat_verify_directory(&stats);
19✔
1698
        if (r < 0)
19✔
1699
                return r;
1700

1701
        *ret_uid = stats.st_uid;
19✔
1702
        return 0;
19✔
1703
}
1704

1705
int cg_get_keyed_attribute(
25,897✔
1706
                const char *controller,
1707
                const char *path,
1708
                const char *attribute,
1709
                char * const *keys,
1710
                char **values) {
1711

1712
        _cleanup_free_ char *filename = NULL, *contents = NULL;
25,897✔
1713
        size_t n;
25,897✔
1714
        int r;
25,897✔
1715

1716
        assert(path);
25,897✔
1717
        assert(attribute);
25,897✔
1718

1719
        /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
1720
         * all keys to retrieve. The 'values' parameter should be passed as string size with the same number of
1721
         * entries as 'keys'. On success each entry will be set to the value of the matching key.
1722
         *
1723
         * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
1724

1725
        r = cg_get_path(controller, path, attribute, &filename);
25,897✔
1726
        if (r < 0)
25,897✔
1727
                return r;
1728

1729
        r = read_full_file(filename, &contents, /* ret_size = */ NULL);
25,897✔
1730
        if (r < 0)
25,897✔
1731
                return r;
1732

1733
        n = strv_length(keys);
23,327✔
1734
        if (n == 0) /* No keys to retrieve? That's easy, we are done then */
23,327✔
1735
                return 0;
1736
        assert(strv_is_uniq(keys));
23,327✔
1737

1738
        /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
1739
        char **v = newa0(char*, n);
23,327✔
1740
        size_t n_done = 0;
23,327✔
1741

1742
        for (const char *p = contents; *p;) {
73,600✔
1743
                const char *w;
1744
                size_t i;
1745

1746
                for (i = 0; i < n; i++) {
123,881✔
1747
                        w = first_word(p, keys[i]);
81,364✔
1748
                        if (w)
81,364✔
1749
                                break;
1750
                }
1751

1752
                if (w) {
73,598✔
1753
                        if (v[i]) { /* duplicate entry? */
31,081✔
1754
                                r = -EBADMSG;
×
1755
                                goto fail;
×
1756
                        }
1757

1758
                        size_t l = strcspn(w, NEWLINE);
31,081✔
1759

1760
                        v[i] = strndup(w, l);
31,081✔
1761
                        if (!v[i]) {
31,081✔
1762
                                r = -ENOMEM;
×
1763
                                goto fail;
×
1764
                        }
1765

1766
                        n_done++;
31,081✔
1767
                        if (n_done >= n)
31,081✔
1768
                                break;
1769

1770
                        p = w + l;
7,756✔
1771
                } else
1772
                        p += strcspn(p, NEWLINE);
42,517✔
1773

1774
                p += strspn(p, NEWLINE);
50,273✔
1775
        }
1776

1777
        if (n_done < n) {
23,327✔
1778
                r = -ENXIO;
2✔
1779
                goto fail;
2✔
1780
        }
1781

1782
        memcpy(values, v, sizeof(char*) * n);
23,325✔
1783
        return 0;
23,325✔
1784

1785
fail:
2✔
1786
        free_many_charp(v, n);
25,899✔
1787
        return r;
1788
}
1789

1790
int cg_mask_to_string(CGroupMask mask, char **ret) {
9,937✔
1791
        _cleanup_free_ char *s = NULL;
9,937✔
1792
        bool space = false;
9,937✔
1793
        CGroupController c;
9,937✔
1794
        size_t n = 0;
9,937✔
1795

1796
        assert(ret);
9,937✔
1797

1798
        if (mask == 0) {
9,937✔
1799
                *ret = NULL;
5,686✔
1800
                return 0;
5,686✔
1801
        }
1802

1803
        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
59,514✔
1804
                const char *k;
55,263✔
1805
                size_t l;
55,263✔
1806

1807
                if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
55,263✔
1808
                        continue;
31,848✔
1809

1810
                k = cgroup_controller_to_string(c);
23,415✔
1811
                l = strlen(k);
23,415✔
1812

1813
                if (!GREEDY_REALLOC(s, n + space + l + 1))
23,415✔
1814
                        return -ENOMEM;
1815

1816
                if (space)
23,415✔
1817
                        s[n] = ' ';
19,164✔
1818
                memcpy(s + n + space, k, l);
23,415✔
1819
                n += space + l;
23,415✔
1820

1821
                space = true;
23,415✔
1822
        }
1823

1824
        assert(s);
4,251✔
1825

1826
        s[n] = 0;
4,251✔
1827
        *ret = TAKE_PTR(s);
4,251✔
1828

1829
        return 0;
4,251✔
1830
}
1831

1832
int cg_mask_from_string(const char *value, CGroupMask *ret) {
4,024✔
1833
        CGroupMask m = 0;
4,024✔
1834

1835
        assert(ret);
4,024✔
1836
        assert(value);
4,024✔
1837

1838
        for (;;) {
27,180✔
1839
                _cleanup_free_ char *n = NULL;
23,156✔
1840
                CGroupController v;
27,180✔
1841
                int r;
27,180✔
1842

1843
                r = extract_first_word(&value, &n, NULL, 0);
27,180✔
1844
                if (r < 0)
27,180✔
1845
                        return r;
×
1846
                if (r == 0)
27,180✔
1847
                        break;
1848

1849
                v = cgroup_controller_from_string(n);
23,156✔
1850
                if (v < 0)
23,156✔
1851
                        continue;
770✔
1852

1853
                m |= CGROUP_CONTROLLER_TO_MASK(v);
22,386✔
1854
        }
1855

1856
        *ret = m;
4,024✔
1857
        return 0;
4,024✔
1858
}
1859

1860
int cg_mask_supported_subtree(const char *root, CGroupMask *ret) {
502✔
1861
        CGroupMask mask;
502✔
1862
        int r;
502✔
1863

1864
        /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
1865
         * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
1866
         * pseudo-controllers. */
1867

1868
        r = cg_all_unified();
502✔
1869
        if (r < 0)
502✔
1870
                return r;
502✔
1871
        if (r > 0) {
502✔
1872
                _cleanup_free_ char *controllers = NULL, *path = NULL;
502✔
1873

1874
                /* In the unified hierarchy we can read the supported and accessible controllers from
1875
                 * the top-level cgroup attribute */
1876

1877
                r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
502✔
1878
                if (r < 0)
502✔
1879
                        return r;
1880

1881
                r = read_one_line_file(path, &controllers);
502✔
1882
                if (r < 0)
502✔
1883
                        return r;
1884

1885
                r = cg_mask_from_string(controllers, &mask);
502✔
1886
                if (r < 0)
502✔
1887
                        return r;
1888

1889
                /* Mask controllers that are not supported in unified hierarchy. */
1890
                mask &= CGROUP_MASK_V2;
502✔
1891

1892
        } else {
1893
                CGroupController c;
×
1894

1895
                /* In the legacy hierarchy, we check which hierarchies are accessible. */
1896

1897
                mask = 0;
×
1898
                for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
×
1899
                        CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
×
1900
                        const char *n;
×
1901

1902
                        if (!FLAGS_SET(CGROUP_MASK_V1, bit))
×
1903
                                continue;
×
1904

1905
                        n = cgroup_controller_to_string(c);
×
1906
                        if (controller_is_v1_accessible(root, n) >= 0)
×
1907
                                mask |= bit;
×
1908
                }
1909
        }
1910

1911
        *ret = mask;
502✔
1912
        return 0;
502✔
1913
}
1914

1915
int cg_mask_supported(CGroupMask *ret) {
250✔
1916
        _cleanup_free_ char *root = NULL;
250✔
1917
        int r;
250✔
1918

1919
        r = cg_get_root_path(&root);
250✔
1920
        if (r < 0)
250✔
1921
                return r;
1922

1923
        return cg_mask_supported_subtree(root, ret);
250✔
1924
}
1925

1926
/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on
1927
 * /sys/fs/cgroup/systemd. This unfortunately broke other tools (such as docker) which expected the v1
1928
 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mounts v2 on
1929
 * /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility
1930
 * with other tools.
1931
 *
1932
 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep
1933
 * cgroup v2 process management but disable the compat dual layout, we return true on
1934
 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and false on cg_hybrid_unified().
1935
 */
1936
static thread_local bool unified_systemd_v232;
1937

1938
int cg_unified_cached(bool flush) {
358,040✔
1939
        static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
358,040✔
1940

1941
        struct statfs fs;
358,040✔
1942

1943
        /* Checks if we support the unified hierarchy. Returns an
1944
         * error when the cgroup hierarchies aren't mounted yet or we
1945
         * have any other trouble determining if the unified hierarchy
1946
         * is supported. */
1947

1948
        if (flush)
358,040✔
1949
                unified_cache = CGROUP_UNIFIED_UNKNOWN;
4✔
1950
        else if (unified_cache >= CGROUP_UNIFIED_NONE)
358,036✔
1951
                return unified_cache;
358,040✔
1952

1953
        if (statfs("/sys/fs/cgroup/", &fs) < 0)
11,258✔
1954
                return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
×
1955

1956
        if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
11,258✔
1957
                log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
11,258✔
1958
                unified_cache = CGROUP_UNIFIED_ALL;
11,258✔
1959
        } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
×
1960
                if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
×
1961
                    F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
×
1962
                        log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
×
1963
                        unified_cache = CGROUP_UNIFIED_SYSTEMD;
×
1964
                        unified_systemd_v232 = false;
×
1965
                } else {
1966
                        if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) {
×
1967
                                if (errno == ENOENT) {
×
1968
                                        /* Some other software may have set up /sys/fs/cgroup in a configuration we do not recognize. */
1969
                                        log_debug_errno(errno, "Unsupported cgroupsv1 setup detected: name=systemd hierarchy not found.");
×
1970
                                        return -ENOMEDIUM;
×
1971
                                }
1972
                                return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
×
1973
                        }
1974

1975
                        if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
×
1976
                                log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
×
1977
                                unified_cache = CGROUP_UNIFIED_SYSTEMD;
×
1978
                                unified_systemd_v232 = true;
×
1979
                        } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
×
1980
                                log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
×
1981
                                unified_cache = CGROUP_UNIFIED_NONE;
×
1982
                        } else {
1983
                                log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
×
1984
                                          (unsigned long long) fs.f_type);
1985
                                unified_cache = CGROUP_UNIFIED_NONE;
×
1986
                        }
1987
                }
1988
        } else if (F_TYPE_EQUAL(fs.f_type, SYSFS_MAGIC)) {
×
1989
                return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
×
1990
                                       "No filesystem is currently mounted on /sys/fs/cgroup.");
1991
        } else
1992
                return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
×
1993
                                       "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
1994
                                       (unsigned long long)fs.f_type);
1995

1996
        return unified_cache;
11,258✔
1997
}
1998

1999
int cg_unified_controller(const char *controller) {
53,026✔
2000
        int r;
53,026✔
2001

2002
        r = cg_unified_cached(false);
53,026✔
2003
        if (r < 0)
53,026✔
2004
                return r;
2005

2006
        if (r == CGROUP_UNIFIED_NONE)
53,026✔
2007
                return false;
2008

2009
        if (r >= CGROUP_UNIFIED_ALL)
53,026✔
2010
                return true;
2011

2012
        return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
×
2013
}
2014

2015
int cg_all_unified(void) {
305,008✔
2016
        int r;
305,008✔
2017

2018
        r = cg_unified_cached(false);
305,008✔
2019
        if (r < 0)
305,008✔
2020
                return r;
2021

2022
        return r >= CGROUP_UNIFIED_ALL;
305,008✔
2023
}
2024

2025
int cg_hybrid_unified(void) {
1✔
2026
        int r;
1✔
2027

2028
        r = cg_unified_cached(false);
1✔
2029
        if (r < 0)
1✔
2030
                return r;
2031

2032
        return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
1✔
2033
}
2034

2035
int cg_is_delegated(const char *path) {
19✔
2036
        int r;
19✔
2037

2038
        assert(path);
19✔
2039

2040
        r = cg_get_xattr_bool(path, "trusted.delegate");
19✔
2041
        if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
19✔
2042
                return r;
2043

2044
        /* If the trusted xattr isn't set (preferred), then check the untrusted one. Under the assumption
2045
         * that whoever is trusted enough to own the cgroup, is also trusted enough to decide if it is
2046
         * delegated or not this should be safe. */
2047
        r = cg_get_xattr_bool(path, "user.delegate");
6✔
2048
        return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
6✔
2049
}
2050

2051
int cg_is_delegated_fd(int fd) {
199✔
2052
        int r;
199✔
2053

2054
        assert(fd >= 0);
199✔
2055

2056
        r = getxattr_at_bool(fd, /* path= */ NULL, "trusted.delegate", /* at_flags= */ 0);
199✔
2057
        if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
199✔
2058
                return r;
2059

2060
        r = getxattr_at_bool(fd, /* path= */ NULL, "user.delegate", /* at_flags= */ 0);
185✔
2061
        return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
185✔
2062
}
2063

2064
int cg_has_coredump_receive(const char *path) {
2✔
2065
        int r;
2✔
2066

2067
        assert(path);
2✔
2068

2069
        r = cg_get_xattr_bool(path, "user.coredump_receive");
2✔
2070
        if (ERRNO_IS_NEG_XATTR_ABSENT(r))
2✔
2071
                return false;
×
2072

2073
        return r;
2074
}
2075

2076
const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2077
        [CGROUP_IO_RBPS_MAX]  = CGROUP_LIMIT_MAX,
2078
        [CGROUP_IO_WBPS_MAX]  = CGROUP_LIMIT_MAX,
2079
        [CGROUP_IO_RIOPS_MAX] = CGROUP_LIMIT_MAX,
2080
        [CGROUP_IO_WIOPS_MAX] = CGROUP_LIMIT_MAX,
2081
};
2082

2083
static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2084
        [CGROUP_IO_RBPS_MAX]  = "IOReadBandwidthMax",
2085
        [CGROUP_IO_WBPS_MAX]  = "IOWriteBandwidthMax",
2086
        [CGROUP_IO_RIOPS_MAX] = "IOReadIOPSMax",
2087
        [CGROUP_IO_WIOPS_MAX] = "IOWriteIOPSMax",
2088
};
2089

2090
DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
12,554✔
2091

2092
void cgroup_io_limits_list(void) {
20✔
2093
        DUMP_STRING_TABLE(cgroup_io_limit_type, CGroupIOLimitType, _CGROUP_IO_LIMIT_TYPE_MAX);
100✔
2094
}
20✔
2095

2096
static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2097
        [CGROUP_CONTROLLER_CPU]                             = "cpu",
2098
        [CGROUP_CONTROLLER_CPUACCT]                         = "cpuacct",
2099
        [CGROUP_CONTROLLER_CPUSET]                          = "cpuset",
2100
        [CGROUP_CONTROLLER_IO]                              = "io",
2101
        [CGROUP_CONTROLLER_BLKIO]                           = "blkio",
2102
        [CGROUP_CONTROLLER_MEMORY]                          = "memory",
2103
        [CGROUP_CONTROLLER_DEVICES]                         = "devices",
2104
        [CGROUP_CONTROLLER_PIDS]                            = "pids",
2105
        [CGROUP_CONTROLLER_BPF_FIREWALL]                    = "bpf-firewall",
2106
        [CGROUP_CONTROLLER_BPF_DEVICES]                     = "bpf-devices",
2107
        [CGROUP_CONTROLLER_BPF_FOREIGN]                     = "bpf-foreign",
2108
        [CGROUP_CONTROLLER_BPF_SOCKET_BIND]                 = "bpf-socket-bind",
2109
        [CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES] = "bpf-restrict-network-interfaces",
2110
};
2111

2112
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
403,532✔
2113

2114
static const char* const managed_oom_mode_table[_MANAGED_OOM_MODE_MAX] = {
2115
        [MANAGED_OOM_AUTO] = "auto",
2116
        [MANAGED_OOM_KILL] = "kill",
2117
};
2118

2119
DEFINE_STRING_TABLE_LOOKUP(managed_oom_mode, ManagedOOMMode);
31,999✔
2120

2121
static const char* const managed_oom_preference_table[_MANAGED_OOM_PREFERENCE_MAX] = {
2122
        [MANAGED_OOM_PREFERENCE_NONE] = "none",
2123
        [MANAGED_OOM_PREFERENCE_AVOID] = "avoid",
2124
        [MANAGED_OOM_PREFERENCE_OMIT] = "omit",
2125
};
2126

2127
DEFINE_STRING_TABLE_LOOKUP(managed_oom_preference, ManagedOOMPreference);
15,736✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc