• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 13935887515

18 Mar 2025 07:10PM UTC coverage: 71.913% (-0.03%) from 71.946%
13935887515

push

github

web-flow
Several fixes and cleanups around sd_listen_fds() (#36788)

15 of 24 new or added lines in 5 files covered. (62.5%)

993 existing lines in 54 files now uncovered.

296157 of 411825 relevant lines covered (71.91%)

710024.94 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.38
/src/basic/cgroup-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <errno.h>
4
#include <limits.h>
5
#include <signal.h>
6
#include <stddef.h>
7
#include <stdlib.h>
8
#include <sys/types.h>
9
#include <sys/utsname.h>
10
#include <sys/xattr.h>
11
#include <threads.h>
12
#include <unistd.h>
13

14
#include "alloc-util.h"
15
#include "capsule-util.h"
16
#include "cgroup-util.h"
17
#include "constants.h"
18
#include "dirent-util.h"
19
#include "extract-word.h"
20
#include "fd-util.h"
21
#include "fileio.h"
22
#include "format-util.h"
23
#include "fs-util.h"
24
#include "log.h"
25
#include "login-util.h"
26
#include "macro.h"
27
#include "missing_fs.h"
28
#include "missing_magic.h"
29
#include "mkdir.h"
30
#include "parse-util.h"
31
#include "path-util.h"
32
#include "process-util.h"
33
#include "set.h"
34
#include "special.h"
35
#include "stat-util.h"
36
#include "stdio-util.h"
37
#include "string-table.h"
38
#include "string-util.h"
39
#include "strv.h"
40
#include "unit-name.h"
41
#include "user-util.h"
42
#include "xattr-util.h"
43

44
int cg_path_open(const char *controller, const char *path) {
784✔
45
        _cleanup_free_ char *fs = NULL;
784✔
46
        int r;
784✔
47

48
        r = cg_get_path(controller, path, /* item=*/ NULL, &fs);
784✔
49
        if (r < 0)
784✔
50
                return r;
51

52
        return RET_NERRNO(open(fs, O_DIRECTORY|O_CLOEXEC));
784✔
53
}
54

55
int cg_cgroupid_open(int cgroupfs_fd, uint64_t id) {
8✔
56
        _cleanup_close_ int fsfd = -EBADF;
8✔
57

58
        if (cgroupfs_fd < 0) {
8✔
59
                fsfd = open("/sys/fs/cgroup", O_CLOEXEC|O_DIRECTORY);
7✔
60
                if (fsfd < 0)
7✔
61
                        return -errno;
×
62

63
                cgroupfs_fd = fsfd;
64
        }
65

66
        cg_file_handle fh = CG_FILE_HANDLE_INIT;
8✔
67
        CG_FILE_HANDLE_CGROUPID(fh) = id;
8✔
68

69
        return RET_NERRNO(open_by_handle_at(cgroupfs_fd, &fh.file_handle, O_DIRECTORY|O_CLOEXEC));
14✔
70
}
71

72
int cg_path_from_cgroupid(int cgroupfs_fd, uint64_t id, char **ret) {
×
73
        _cleanup_close_ int cgfd = -EBADF;
×
74
        int r;
×
75

76
        cgfd = cg_cgroupid_open(cgroupfs_fd, id);
×
77
        if (cgfd < 0)
×
78
                return cgfd;
79

80
        _cleanup_free_ char *path = NULL;
×
81
        r = fd_get_path(cgfd, &path);
×
82
        if (r < 0)
×
83
                return r;
84

85
        if (!path_startswith(path, "/sys/fs/cgroup/"))
×
86
                return -EXDEV; /* recognizable error */
87

88
        if (ret)
×
89
                *ret = TAKE_PTR(path);
×
90
        return 0;
91
}
92

93
int cg_get_cgroupid_at(int dfd, const char *path, uint64_t *ret) {
5,745✔
94
        cg_file_handle fh = CG_FILE_HANDLE_INIT;
5,745✔
95
        int mnt_id;
5,745✔
96

97
        assert(dfd >= 0 || (dfd == AT_FDCWD && path_is_absolute(path)));
11,461✔
98
        assert(ret);
5,745✔
99

100
        /* This is cgroupfs so we know the size of the handle, thus no need to loop around like
101
         * name_to_handle_at_loop() does in mountpoint-util.c */
102
        if (name_to_handle_at(dfd, strempty(path), &fh.file_handle, &mnt_id, isempty(path) ? AT_EMPTY_PATH : 0) < 0) {
11,490✔
103
                assert(errno != EOVERFLOW);
×
104
                return -errno;
×
105
        }
106

107
        *ret = CG_FILE_HANDLE_CGROUPID(fh);
5,745✔
108
        return 0;
5,745✔
109
}
110

111
static int cg_enumerate_items(const char *controller, const char *path, FILE **ret, const char *item) {
29,875✔
112
        _cleanup_free_ char *fs = NULL;
29,875✔
113
        FILE *f;
29,875✔
114
        int r;
29,875✔
115

116
        assert(ret);
29,875✔
117

118
        r = cg_get_path(controller, path, item, &fs);
29,875✔
119
        if (r < 0)
29,875✔
120
                return r;
121

122
        f = fopen(fs, "re");
29,875✔
123
        if (!f)
29,875✔
124
                return -errno;
18,797✔
125

126
        *ret = f;
11,078✔
127
        return 0;
11,078✔
128
}
129

130
int cg_enumerate_processes(const char *controller, const char *path, FILE **ret) {
411✔
131
        return cg_enumerate_items(controller, path, ret, "cgroup.procs");
411✔
132
}
133

134
int cg_read_pid(FILE *f, pid_t *ret, CGroupFlags flags) {
22,134✔
135
        unsigned long ul;
22,134✔
136

137
        /* Note that the cgroup.procs might contain duplicates! See cgroups.txt for details. */
138

139
        assert(f);
22,134✔
140
        assert(ret);
22,134✔
141

142
        for (;;) {
22,134✔
143
                errno = 0;
22,134✔
144
                if (fscanf(f, "%lu", &ul) != 1) {
22,134✔
145

146
                        if (feof(f)) {
11,308✔
147
                                *ret = 0;
11,308✔
148
                                return 0;
11,308✔
149
                        }
150

151
                        return errno_or_else(EIO);
×
152
                }
153

154
                if (ul > PID_T_MAX)
10,826✔
155
                        return -EIO;
156

157
                /* In some circumstances (e.g. WSL), cgroups might contain unmappable PIDs from other
158
                 * contexts. These show up as zeros, and depending on the caller, can either be plain
159
                 * skipped over, or returned as-is. */
160
                if (ul == 0 && !FLAGS_SET(flags, CGROUP_DONT_SKIP_UNMAPPED))
10,826✔
161
                        continue;
×
162

163
                *ret = (pid_t) ul;
10,826✔
164
                return 1;
10,826✔
165
        }
166
}
167

168
int cg_read_pidref(FILE *f, PidRef *ret, CGroupFlags flags) {
15,105✔
169
        int r;
15,105✔
170

171
        assert(f);
15,105✔
172
        assert(ret);
15,105✔
173

174
        for (;;) {
×
175
                pid_t pid;
15,105✔
176

177
                r = cg_read_pid(f, &pid, flags);
15,105✔
178
                if (r < 0)
15,105✔
179
                        return log_debug_errno(r, "Failed to read pid from cgroup item: %m");
×
180
                if (r == 0) {
15,105✔
181
                        *ret = PIDREF_NULL;
10,685✔
182
                        return 0;
10,685✔
183
                }
184

185
                if (pid == 0)
4,420✔
186
                        return -EREMOTE;
187

188
                if (FLAGS_SET(flags, CGROUP_NO_PIDFD)) {
4,420✔
189
                        *ret = PIDREF_MAKE_FROM_PID(pid);
533✔
190
                        return 1;
533✔
191
                }
192

193
                r = pidref_set_pid(ret, pid);
3,887✔
194
                if (r >= 0)
3,887✔
195
                        return 1;
196
                if (r != -ESRCH)
×
197
                        return r;
198

199
                /* ESRCH → gone by now? just skip over it, read the next */
200
        }
201
}
202

203
int cg_read_event(
12,529✔
204
                const char *controller,
205
                const char *path,
206
                const char *event,
207
                char **ret) {
208

209
        _cleanup_free_ char *events = NULL, *content = NULL;
12,529✔
210
        int r;
12,529✔
211

212
        r = cg_get_path(controller, path, "cgroup.events", &events);
12,529✔
213
        if (r < 0)
12,529✔
214
                return r;
215

216
        r = read_full_virtual_file(events, &content, NULL);
12,529✔
217
        if (r < 0)
12,529✔
218
                return r;
219

220
        for (const char *p = content;;) {
4,778✔
221
                _cleanup_free_ char *line = NULL, *key = NULL;
4,778✔
222
                const char *q;
4,778✔
223

224
                r = extract_first_word(&p, &line, "\n", 0);
4,778✔
225
                if (r < 0)
4,778✔
226
                        return r;
227
                if (r == 0)
4,778✔
228
                        return -ENOENT;
229

230
                q = line;
4,778✔
231
                r = extract_first_word(&q, &key, " ", 0);
4,778✔
232
                if (r < 0)
4,778✔
233
                        return r;
234
                if (r == 0)
4,778✔
235
                        return -EINVAL;
236

237
                if (!streq(key, event))
4,778✔
238
                        continue;
×
239

240
                return strdup_to(ret, q);
4,778✔
241
        }
242
}
243

244
bool cg_ns_supported(void) {
607✔
245
        static thread_local int supported = -1;
607✔
246

247
        if (supported >= 0)
607✔
248
                return supported;
×
249

250
        if (access("/proc/self/ns/cgroup", F_OK) >= 0)
607✔
251
                return (supported = true);
607✔
252
        if (errno != ENOENT)
×
253
                log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
×
254
        return (supported = false);
×
255
}
256

257
bool cg_freezer_supported(void) {
×
258
        static thread_local int supported = -1;
×
259

260
        if (supported >= 0)
×
261
                return supported;
×
262

263
        if (cg_all_unified() <= 0)
×
264
                return (supported = false);
×
265

266
        if (access("/sys/fs/cgroup/init.scope/cgroup.freeze", F_OK) >= 0)
×
267
                return (supported = true);
×
268
        if (errno != ENOENT)
×
269
                log_debug_errno(errno, "Failed to check whether cgroup freezer is available, assuming not: %m");
×
270
        return (supported = false);
×
271
}
272

273
bool cg_kill_supported(void) {
×
274
        static thread_local int supported = -1;
×
275

276
        if (supported >= 0)
×
277
                return supported;
×
278

279
        if (cg_all_unified() <= 0)
×
280
                return (supported = false);
×
281

282
        if (access("/sys/fs/cgroup/init.scope/cgroup.kill", F_OK) >= 0)
×
283
                return (supported = true);
×
284
        if (errno != ENOENT)
×
285
                log_debug_errno(errno, "Failed to check whether cgroup.kill is available, assuming not: %m");
×
286
        return (supported = false);
×
287
}
288

289
int cg_enumerate_subgroups(const char *controller, const char *path, DIR **ret) {
27,597✔
290
        _cleanup_free_ char *fs = NULL;
27,597✔
291
        DIR *d;
27,597✔
292
        int r;
27,597✔
293

294
        assert(ret);
27,597✔
295

296
        /* This is not recursive! */
297

298
        r = cg_get_path(controller, path, NULL, &fs);
27,597✔
299
        if (r < 0)
27,597✔
300
                return r;
301

302
        d = opendir(fs);
27,597✔
303
        if (!d)
27,597✔
304
                return -errno;
16,152✔
305

306
        *ret = d;
11,445✔
307
        return 0;
11,445✔
308
}
309

310
int cg_read_subgroup(DIR *d, char **ret) {
16,961✔
311
        assert(d);
16,961✔
312
        assert(ret);
16,961✔
313

314
        FOREACH_DIRENT_ALL(de, d, return -errno) {
537,906✔
315
                if (de->d_type != DT_DIR)
526,231✔
316
                        continue;
497,595✔
317

318
                if (dot_or_dot_dot(de->d_name))
28,636✔
319
                        continue;
23,350✔
320

321
                return strdup_to_full(ret, de->d_name);
5,286✔
322
        }
323

324
        *ret = NULL;
11,675✔
325
        return 0;
11,675✔
326
}
327

328
static int cg_kill_items(
29,223✔
329
                const char *path,
330
                const char *item,
331
                int sig,
332
                CGroupFlags flags,
333
                Set *s,
334
                cg_kill_log_func_t log_kill,
335
                void *userdata) {
336

337
        _cleanup_set_free_ Set *allocated_set = NULL;
29,223✔
338
        int r, ret = 0;
29,223✔
339

340
        assert(path);
29,223✔
341
        assert(item);
29,223✔
342
        assert(sig >= 0);
29,223✔
343

344
         /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence
345
          * don't send SIGCONT on SIGKILL. */
346
        if (IN_SET(sig, SIGCONT, SIGKILL))
29,223✔
347
                flags &= ~CGROUP_SIGCONT;
10,992✔
348

349
        /* This goes through the tasks list and kills them all. This is repeated until no further processes
350
         * are added to the tasks list, to properly handle forking processes.
351
         *
352
         * When sending SIGKILL, prefer cg_kill_kernel_sigkill(), which is fully atomic. */
353

354
        if (!s) {
29,223✔
355
                s = allocated_set = set_new(NULL);
938✔
356
                if (!s)
938✔
357
                        return -ENOMEM;
358
        }
359

360
        bool done;
29,464✔
361
        do {
29,464✔
362
                _cleanup_fclose_ FILE *f = NULL;
18,797✔
363
                int ret_log_kill;
29,464✔
364

365
                done = true;
29,464✔
366

367
                r = cg_enumerate_items(SYSTEMD_CGROUP_CONTROLLER, path, &f, item);
29,464✔
368
                if (r == -ENOENT)
29,464✔
369
                        break;
370
                if (r < 0)
10,667✔
371
                        return RET_GATHER(ret, log_debug_errno(r, "Failed to enumerate cgroup items: %m"));
×
372

373
                for (;;) {
15,007✔
374
                        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
15,007✔
375

376
                        r = cg_read_pidref(f, &pidref, flags);
15,007✔
377
                        if (r < 0)
15,007✔
378
                                return RET_GATHER(ret, log_debug_errno(r, "Failed to read pidref from cgroup '%s': %m", path));
×
379
                        if (r == 0)
15,007✔
380
                                break;
381

382
                        if ((flags & CGROUP_IGNORE_SELF) && pidref_is_self(&pidref))
4,340✔
383
                                continue;
938✔
384

385
                        if (set_contains(s, PID_TO_PTR(pidref.pid)))
3,402✔
386
                                continue;
2,049✔
387

388
                        /* Ignore kernel threads to mimic the behavior of cgroup.kill. */
389
                        if (pidref_is_kernel_thread(&pidref) > 0) {
1,353✔
390
                                log_debug("Ignoring kernel thread with pid " PID_FMT " in cgroup '%s'", pidref.pid, path);
×
391
                                continue;
×
392
                        }
393

394
                        if (log_kill)
1,353✔
395
                                ret_log_kill = log_kill(&pidref, sig, userdata);
92✔
396

397
                        /* If we haven't killed this process yet, kill it */
398
                        r = pidref_kill(&pidref, sig);
1,353✔
399
                        if (r < 0 && r != -ESRCH)
1,353✔
400
                                RET_GATHER(ret, log_debug_errno(r, "Failed to kill process with pid " PID_FMT " from cgroup '%s': %m", pidref.pid, path));
×
401
                        if (r >= 0) {
1,353✔
402
                                if (flags & CGROUP_SIGCONT)
1,353✔
403
                                        (void) pidref_kill(&pidref, SIGCONT);
1,259✔
404

405
                                if (ret == 0) {
1,353✔
406
                                        if (log_kill)
312✔
407
                                                ret = ret_log_kill;
408
                                        else
409
                                                ret = 1;
220✔
410
                                }
411
                        }
412

413
                        done = false;
1,353✔
414

415
                        r = set_put(s, PID_TO_PTR(pidref.pid));
1,353✔
416
                        if (r < 0)
1,353✔
417
                                return RET_GATHER(ret, r);
×
418
                }
419

420
                /* To avoid racing against processes which fork quicker than we can kill them, we repeat this
421
                 * until no new pids need to be killed. */
422

423
        } while (!done);
10,667✔
424

425
        return ret;
426
}
427

428
int cg_kill(
23,727✔
429
                const char *path,
430
                int sig,
431
                CGroupFlags flags,
432
                Set *s,
433
                cg_kill_log_func_t log_kill,
434
                void *userdata) {
435

436
        int r, ret;
23,727✔
437

438
        assert(path);
23,727✔
439

440
        ret = cg_kill_items(path, "cgroup.procs", sig, flags, s, log_kill, userdata);
23,727✔
441
        if (ret < 0)
23,727✔
442
                return log_debug_errno(ret, "Failed to kill processes in cgroup '%s' item cgroup.procs: %m", path);
×
443
        if (sig != SIGKILL)
23,727✔
444
                return ret;
445

446
        /* Only in case of killing with SIGKILL and when using cgroupsv2, kill remaining threads manually as
447
           a workaround for kernel bug. It was fixed in 5.2-rc5 (c03cd7738a83), backported to 4.19.66
448
           (4340d175b898) and 4.14.138 (feb6b123b7dd). */
449
        r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
5,496✔
450
        if (r < 0)
5,496✔
451
                return r;
452
        if (r == 0)
5,496✔
453
                return ret;
454

455
        /* Opening pidfds for non thread group leaders only works from 6.9 onwards with PIDFD_THREAD. On
456
         * older kernels or without PIDFD_THREAD pidfd_open() fails with EINVAL. Since we might read non
457
         * thread group leader IDs from cgroup.threads, we set CGROUP_NO_PIDFD to avoid trying open pidfd's
458
         * for them and instead use the regular pid. */
459
        r = cg_kill_items(path, "cgroup.threads", sig, flags|CGROUP_NO_PIDFD, s, log_kill, userdata);
5,496✔
460
        if (r < 0)
5,496✔
461
                return log_debug_errno(r, "Failed to kill processes in cgroup '%s' item cgroup.threads: %m", path);
×
462

463
        return r > 0 || ret > 0;
5,496✔
464
}
465

466
int cg_kill_recursive(
23,256✔
467
                const char *path,
468
                int sig,
469
                CGroupFlags flags,
470
                Set *s,
471
                cg_kill_log_func_t log_kill,
472
                void *userdata) {
473

474
        _cleanup_set_free_ Set *allocated_set = NULL;
×
475
        _cleanup_closedir_ DIR *d = NULL;
23,256✔
476
        int r, ret;
23,256✔
477

478
        assert(path);
23,256✔
479
        assert(sig >= 0);
23,256✔
480

481
        if (!s) {
23,256✔
482
                s = allocated_set = set_new(NULL);
22,341✔
483
                if (!s)
22,341✔
484
                        return -ENOMEM;
485
        }
486

487
        ret = cg_kill(path, sig, flags, s, log_kill, userdata);
23,256✔
488

489
        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
23,256✔
490
        if (r < 0) {
23,256✔
491
                if (r != -ENOENT)
16,152✔
492
                        RET_GATHER(ret, log_debug_errno(r, "Failed to enumerate cgroup '%s' subgroups: %m", path));
×
493

494
                return ret;
16,152✔
495
        }
496

497
        for (;;) {
7,468✔
498
                _cleanup_free_ char *fn = NULL, *p = NULL;
7,286✔
499

500
                r = cg_read_subgroup(d, &fn);
7,286✔
501
                if (r < 0) {
7,286✔
502
                        RET_GATHER(ret, log_debug_errno(r, "Failed to read subgroup from cgroup '%s': %m", path));
×
503
                        break;
504
                }
505
                if (r == 0)
7,286✔
506
                        break;
507

508
                p = path_join(empty_to_root(path), fn);
364✔
509
                if (!p)
182✔
510
                        return -ENOMEM;
×
511

512
                r = cg_kill_recursive(p, sig, flags, s, log_kill, userdata);
182✔
513
                if (r < 0)
182✔
514
                        log_debug_errno(r, "Failed to recursively kill processes in cgroup '%s': %m", p);
×
515
                if (r != 0 && ret >= 0)
182✔
516
                        ret = r;
15✔
517
        }
518

519
        return ret;
7,104✔
520
}
521

522
int cg_kill_kernel_sigkill(const char *path) {
×
523
        _cleanup_free_ char *killfile = NULL;
×
524
        int r;
×
525

526
        /* Kills the cgroup at `path` directly by writing to its cgroup.kill file.  This sends SIGKILL to all
527
         * processes in the cgroup and has the advantage of being completely atomic, unlike cg_kill_items(). */
528

529
        assert(path);
×
530

531
        if (!cg_kill_supported())
×
532
                return -EOPNOTSUPP;
533

534
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.kill", &killfile);
×
535
        if (r < 0)
×
536
                return r;
537

538
        r = write_string_file(killfile, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
×
539
        if (r < 0)
×
540
                return log_debug_errno(r, "Failed to write to cgroup.kill for cgroup '%s': %m", path);
×
541

542
        return 0;
543
}
544

545
static const char *controller_to_dirname(const char *controller) {
×
546
        assert(controller);
×
547

548
        /* Converts a controller name to the directory name below /sys/fs/cgroup/ we want to mount it
549
         * to. Effectively, this just cuts off the name= prefixed used for named hierarchies, if it is
550
         * specified. */
551

552
        if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
×
553
                if (cg_hybrid_unified() > 0)
×
554
                        controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
555
                else
556
                        controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
×
557
        }
558

559
        return startswith(controller, "name=") ?: controller;
×
560
}
561

562
static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **ret) {
×
563
        const char *dn;
×
564
        char *t = NULL;
×
565

566
        assert(ret);
×
567
        assert(controller);
×
568

569
        dn = controller_to_dirname(controller);
×
570

571
        if (isempty(path) && isempty(suffix))
×
572
                t = path_join("/sys/fs/cgroup", dn);
×
573
        else if (isempty(path))
×
574
                t = path_join("/sys/fs/cgroup", dn, suffix);
×
575
        else if (isempty(suffix))
×
576
                t = path_join("/sys/fs/cgroup", dn, path);
×
577
        else
578
                t = path_join("/sys/fs/cgroup", dn, path, suffix);
×
579
        if (!t)
×
580
                return -ENOMEM;
581

582
        *ret = t;
×
583
        return 0;
×
584
}
585

586
static int join_path_unified(const char *path, const char *suffix, char **ret) {
327,968✔
587
        char *t;
327,968✔
588

589
        assert(ret);
327,968✔
590

591
        if (isempty(path) && isempty(suffix))
343,838✔
592
                t = strdup("/sys/fs/cgroup");
2,585✔
593
        else if (isempty(path))
325,383✔
594
                t = path_join("/sys/fs/cgroup", suffix);
13,285✔
595
        else if (isempty(suffix))
312,098✔
596
                t = path_join("/sys/fs/cgroup", path);
122,027✔
597
        else
598
                t = path_join("/sys/fs/cgroup", path, suffix);
190,071✔
599
        if (!t)
327,968✔
600
                return -ENOMEM;
601

602
        *ret = t;
327,968✔
603
        return 0;
327,968✔
604
}
605

606
int cg_get_path(const char *controller, const char *path, const char *suffix, char **ret) {
328,225✔
607
        int r;
328,225✔
608

609
        assert(ret);
328,225✔
610

611
        if (!controller) {
328,225✔
612
                char *t;
257✔
613

614
                /* If no controller is specified, we return the path *below* the controllers, without any
615
                 * prefix. */
616

617
                if (isempty(path) && isempty(suffix))
257✔
618
                        return -EINVAL;
619

620
                if (isempty(suffix))
257✔
621
                        t = strdup(path);
×
622
                else if (isempty(path))
257✔
623
                        t = strdup(suffix);
×
624
                else
625
                        t = path_join(path, suffix);
257✔
626
                if (!t)
257✔
627
                        return -ENOMEM;
628

629
                *ret = path_simplify(t);
257✔
630
                return 0;
257✔
631
        }
632

633
        if (!cg_controller_is_valid(controller))
327,968✔
634
                return -EINVAL;
635

636
        r = cg_all_unified();
327,968✔
637
        if (r < 0)
327,968✔
638
                return r;
639
        if (r > 0)
327,968✔
640
                r = join_path_unified(path, suffix, ret);
327,968✔
641
        else
642
                r = join_path_legacy(controller, path, suffix, ret);
×
643
        if (r < 0)
327,968✔
644
                return r;
645

646
        path_simplify(*ret);
327,968✔
647
        return 0;
327,968✔
648
}
649

650
static int controller_is_v1_accessible(const char *root, const char *controller) {
×
651
        const char *cpath, *dn;
×
652

653
        assert(controller);
×
654

655
        dn = controller_to_dirname(controller);
×
656

657
        /* If root if specified, we check that:
658
         * - possible subcgroup is created at root,
659
         * - we can modify the hierarchy. */
660

661
        cpath = strjoina("/sys/fs/cgroup/", dn, root, root ? "/cgroup.procs" : NULL);
×
662
        return access_nofollow(cpath, root ? W_OK : F_OK);
×
663
}
664

665
int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **ret) {
23,910✔
666
        int r;
23,910✔
667

668
        assert(controller);
23,910✔
669
        assert(ret);
23,910✔
670

671
        if (!cg_controller_is_valid(controller))
23,910✔
672
                return -EINVAL;
673

674
        r = cg_all_unified();
23,910✔
675
        if (r < 0)
23,910✔
676
                return r;
677
        if (r > 0) {
23,910✔
678
                /* In the unified hierarchy all controllers are considered accessible,
679
                 * except for the named hierarchies */
680
                if (startswith(controller, "name="))
23,910✔
681
                        return -EOPNOTSUPP;
682
        } else {
683
                /* Check if the specified controller is actually accessible */
684
                r = controller_is_v1_accessible(NULL, controller);
×
685
                if (r < 0)
×
686
                        return r;
687
        }
688

689
        return cg_get_path(controller, path, suffix, ret);
23,910✔
690
}
691

692
int cg_set_xattr(const char *path, const char *name, const void *value, size_t size, int flags) {
8,078✔
693
        _cleanup_free_ char *fs = NULL;
8,078✔
694
        int r;
8,078✔
695

696
        assert(path);
8,078✔
697
        assert(name);
8,078✔
698
        assert(value || size <= 0);
8,078✔
699

700
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
8,078✔
701
        if (r < 0)
8,078✔
702
                return r;
703

704
        return RET_NERRNO(setxattr(fs, name, value, size, flags));
8,078✔
705
}
706

707
int cg_get_xattr_malloc(const char *path, const char *name, char **ret, size_t *ret_size) {
20,081✔
708
        _cleanup_free_ char *fs = NULL;
20,081✔
709
        int r;
20,081✔
710

711
        assert(path);
20,081✔
712
        assert(name);
20,081✔
713

714
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
20,081✔
715
        if (r < 0)
20,081✔
716
                return r;
717

718
        return lgetxattr_malloc(fs, name, ret, ret_size);
20,081✔
719
}
720

721
int cg_get_xattr_bool(const char *path, const char *name) {
423✔
722
        _cleanup_free_ char *fs = NULL;
423✔
723
        int r;
423✔
724

725
        assert(path);
423✔
726
        assert(name);
423✔
727

728
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
423✔
729
        if (r < 0)
423✔
730
                return r;
731

732
        return getxattr_at_bool(AT_FDCWD, fs, name, /* flags= */ 0);
423✔
733
}
734

735
int cg_remove_xattr(const char *path, const char *name) {
39,898✔
736
        _cleanup_free_ char *fs = NULL;
39,898✔
737
        int r;
39,898✔
738

739
        assert(path);
39,898✔
740
        assert(name);
39,898✔
741

742
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
39,898✔
743
        if (r < 0)
39,898✔
744
                return r;
745

746
        return RET_NERRNO(removexattr(fs, name));
79,796✔
747
}
748

749
int cg_pid_get_path(const char *controller, pid_t pid, char **ret_path) {
51,343✔
750
        _cleanup_fclose_ FILE *f = NULL;
51,343✔
751
        const char *fs, *controller_str = NULL;  /* avoid false maybe-uninitialized warning */
51,343✔
752
        int unified, r;
51,343✔
753

754
        assert(pid >= 0);
51,343✔
755
        assert(ret_path);
51,343✔
756

757
        if (controller) {
51,343✔
758
                if (!cg_controller_is_valid(controller))
51,007✔
759
                        return -EINVAL;
760
        } else
761
                controller = SYSTEMD_CGROUP_CONTROLLER;
762

763
        unified = cg_unified_controller(controller);
51,343✔
764
        if (unified < 0)
51,343✔
765
                return unified;
766
        if (unified == 0) {
51,343✔
UNCOV
767
                if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
×
768
                        controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
769
                else
UNCOV
770
                        controller_str = controller;
×
771
        }
772

773
        fs = procfs_file_alloca(pid, "cgroup");
51,343✔
774
        r = fopen_unlocked(fs, "re", &f);
51,343✔
775
        if (r == -ENOENT)
51,343✔
776
                return -ESRCH;
777
        if (r < 0)
48,269✔
778
                return r;
779

780
        for (;;) {
49,570✔
781
                _cleanup_free_ char *line = NULL;
48,919✔
782
                char *e;
48,919✔
783

784
                r = read_line(f, LONG_LINE_MAX, &line);
48,919✔
785
                if (r < 0)
48,919✔
786
                        return r;
787
                if (r == 0)
48,914✔
788
                        return -ENODATA;
789

790
                if (unified) {
48,914✔
791
                        e = startswith(line, "0:");
48,914✔
792
                        if (!e)
48,914✔
793
                                continue;
651✔
794

795
                        e = strchr(e, ':');
48,263✔
796
                        if (!e)
48,263✔
UNCOV
797
                                continue;
×
798
                } else {
UNCOV
799
                        char *l;
×
800

UNCOV
801
                        l = strchr(line, ':');
×
UNCOV
802
                        if (!l)
×
UNCOV
803
                                continue;
×
804

UNCOV
805
                        l++;
×
UNCOV
806
                        e = strchr(l, ':');
×
UNCOV
807
                        if (!e)
×
UNCOV
808
                                continue;
×
UNCOV
809
                        *e = 0;
×
810

UNCOV
811
                        assert(controller_str);
×
UNCOV
812
                        r = string_contains_word(l, ",", controller_str);
×
UNCOV
813
                        if (r < 0)
×
814
                                return r;
UNCOV
815
                        if (r == 0)
×
816
                                continue;
×
817
                }
818

819
                _cleanup_free_ char *path = strdup(e + 1);
48,263✔
820
                if (!path)
48,263✔
821
                        return -ENOMEM;
822

823
                /* Refuse cgroup paths from outside our cgroup namespace */
824
                if (startswith(path, "/../"))
48,263✔
825
                        return -EUNATCH;
826

827
                /* Truncate suffix indicating the process is a zombie */
828
                e = endswith(path, " (deleted)");
48,263✔
829
                if (e)
48,263✔
830
                        *e = 0;
1,261✔
831

832
                *ret_path = TAKE_PTR(path);
48,263✔
833
                return 0;
48,263✔
834
        }
835
}
836

837
int cg_pidref_get_path(const char *controller, const PidRef *pidref, char **ret_path) {
21,128✔
838
        _cleanup_free_ char *path = NULL;
21,128✔
839
        int r;
21,128✔
840

841
        assert(ret_path);
21,128✔
842

843
        if (!pidref_is_set(pidref))
21,128✔
844
                return -ESRCH;
845
        if (pidref_is_remote(pidref))
42,256✔
846
                return -EREMOTE;
847

848
        // XXX: Ideally we'd use pidfd_get_cgroupid() + cg_path_from_cgroupid() here, to extract this
849
        // bit of information from pidfd directly. However, the latter requires privilege and it's
850
        // not entirely clear how to handle cgroups from outer namespace.
851

852
        r = cg_pid_get_path(controller, pidref->pid, &path);
21,128✔
853
        if (r < 0)
21,128✔
854
                return r;
855

856
        /* Before we return the path, make sure the procfs entry for this pid still matches the pidref */
857
        r = pidref_verify(pidref);
21,128✔
858
        if (r < 0)
21,128✔
859
                return r;
860

861
        *ret_path = TAKE_PTR(path);
21,128✔
862
        return 0;
21,128✔
863
}
864

865
int cg_is_empty(const char *controller, const char *path) {
4✔
866
        _cleanup_fclose_ FILE *f = NULL;
4✔
867
        pid_t pid;
4✔
868
        int r;
4✔
869

870
        assert(path);
4✔
871

872
        r = cg_enumerate_processes(controller, path, &f);
4✔
873
        if (r == -ENOENT)
4✔
874
                return true;
875
        if (r < 0)
4✔
876
                return r;
877

878
        r = cg_read_pid(f, &pid, CGROUP_DONT_SKIP_UNMAPPED);
4✔
879
        if (r < 0)
4✔
880
                return r;
881

882
        return r == 0;
4✔
883
}
884

885
int cg_is_empty_recursive(const char *controller, const char *path) {
12,529✔
886
        int r;
12,529✔
887

888
        assert(path);
12,529✔
889

890
        /* The root cgroup is always populated */
891
        if (controller && empty_or_root(path))
12,529✔
892
                return false;
893

894
        r = cg_unified_controller(controller);
12,529✔
895
        if (r < 0)
12,529✔
896
                return r;
897
        if (r > 0) {
12,529✔
898
                _cleanup_free_ char *t = NULL;
12,529✔
899

900
                /* On the unified hierarchy we can check empty state
901
                 * via the "populated" attribute of "cgroup.events". */
902

903
                r = cg_read_event(controller, path, "populated", &t);
12,529✔
904
                if (r == -ENOENT)
12,529✔
905
                        return true;
906
                if (r < 0)
4,778✔
907
                        return r;
908

909
                return streq(t, "0");
4,778✔
910
        } else {
UNCOV
911
                _cleanup_closedir_ DIR *d = NULL;
×
UNCOV
912
                char *fn;
×
913

UNCOV
914
                r = cg_is_empty(controller, path);
×
UNCOV
915
                if (r <= 0)
×
916
                        return r;
917

UNCOV
918
                r = cg_enumerate_subgroups(controller, path, &d);
×
UNCOV
919
                if (r == -ENOENT)
×
920
                        return true;
UNCOV
921
                if (r < 0)
×
922
                        return r;
923

UNCOV
924
                while ((r = cg_read_subgroup(d, &fn)) > 0) {
×
UNCOV
925
                        _cleanup_free_ char *p = NULL;
×
926

UNCOV
927
                        p = path_join(path, fn);
×
UNCOV
928
                        free(fn);
×
UNCOV
929
                        if (!p)
×
930
                                return -ENOMEM;
931

UNCOV
932
                        r = cg_is_empty_recursive(controller, p);
×
933
                        if (r <= 0)
×
934
                                return r;
935
                }
UNCOV
936
                if (r < 0)
×
937
                        return r;
938

UNCOV
939
                return true;
×
940
        }
941
}
942

943
int cg_split_spec(const char *spec, char **ret_controller, char **ret_path) {
23✔
944
        _cleanup_free_ char *controller = NULL, *path = NULL;
23✔
945
        int r;
23✔
946

947
        assert(spec);
23✔
948

949
        if (*spec == '/') {
23✔
950
                if (!path_is_normalized(spec))
15✔
951
                        return -EINVAL;
952

953
                if (ret_path) {
15✔
954
                        r = path_simplify_alloc(spec, &path);
15✔
955
                        if (r < 0)
15✔
956
                                return r;
957
                }
958

959
        } else {
960
                const char *e;
8✔
961

962
                e = strchr(spec, ':');
8✔
963
                if (e) {
8✔
964
                        controller = strndup(spec, e-spec);
6✔
965
                        if (!controller)
6✔
966
                                return -ENOMEM;
967
                        if (!cg_controller_is_valid(controller))
6✔
968
                                return -EINVAL;
969

970
                        if (!isempty(e + 1)) {
3✔
971
                                path = strdup(e+1);
2✔
972
                                if (!path)
2✔
973
                                        return -ENOMEM;
974

975
                                if (!path_is_normalized(path) ||
2✔
976
                                    !path_is_absolute(path))
2✔
977
                                        return -EINVAL;
978

979
                                path_simplify(path);
1✔
980
                        }
981

982
                } else {
983
                        if (!cg_controller_is_valid(spec))
2✔
984
                                return -EINVAL;
985

986
                        if (ret_controller) {
1✔
987
                                controller = strdup(spec);
1✔
988
                                if (!controller)
1✔
989
                                        return -ENOMEM;
990
                        }
991
                }
992
        }
993

994
        if (ret_controller)
18✔
995
                *ret_controller = TAKE_PTR(controller);
18✔
996
        if (ret_path)
18✔
997
                *ret_path = TAKE_PTR(path);
18✔
998
        return 0;
999
}
1000

1001
int cg_mangle_path(const char *path, char **ret) {
465✔
1002
        _cleanup_free_ char *c = NULL, *p = NULL;
465✔
1003
        int r;
465✔
1004

1005
        assert(path);
465✔
1006
        assert(ret);
465✔
1007

1008
        /* First, check if it already is a filesystem path */
1009
        if (path_startswith(path, "/sys/fs/cgroup"))
465✔
1010
                return path_simplify_alloc(path, ret);
461✔
1011

1012
        /* Otherwise, treat it as cg spec */
1013
        r = cg_split_spec(path, &c, &p);
4✔
1014
        if (r < 0)
4✔
1015
                return r;
1016

1017
        return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, ret);
8✔
1018
}
1019

1020
int cg_get_root_path(char **ret_path) {
14,203✔
1021
        char *p, *e;
14,203✔
1022
        int r;
14,203✔
1023

1024
        assert(ret_path);
14,203✔
1025

1026
        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
14,203✔
1027
        if (r < 0)
14,203✔
1028
                return r;
14,203✔
1029

1030
        e = endswith(p, "/" SPECIAL_INIT_SCOPE);
14,203✔
1031
        if (!e)
14,203✔
1032
                e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
79✔
1033
        if (!e)
79✔
1034
                e = endswith(p, "/system"); /* even more legacy */
79✔
1035
        if (e)
14,203✔
1036
                *e = 0;
14,124✔
1037

1038
        *ret_path = p;
14,203✔
1039
        return 0;
14,203✔
1040
}
1041

1042
int cg_shift_path(const char *cgroup, const char *root, const char **ret_shifted) {
11,298✔
1043
        _cleanup_free_ char *rt = NULL;
11,298✔
1044
        char *p;
11,298✔
1045
        int r;
11,298✔
1046

1047
        assert(cgroup);
11,298✔
1048
        assert(ret_shifted);
11,298✔
1049

1050
        if (!root) {
11,298✔
1051
                /* If the root was specified let's use that, otherwise
1052
                 * let's determine it from PID 1 */
1053

1054
                r = cg_get_root_path(&rt);
2,161✔
1055
                if (r < 0)
2,161✔
1056
                        return r;
1057

1058
                root = rt;
2,161✔
1059
        }
1060

1061
        p = path_startswith(cgroup, root);
11,298✔
1062
        if (p && p > cgroup)
11,298✔
1063
                *ret_shifted = p - 1;
2✔
1064
        else
1065
                *ret_shifted = cgroup;
11,296✔
1066

1067
        return 0;
1068
}
1069

1070
int cg_pid_get_path_shifted(pid_t pid, const char *root, char **ret_cgroup) {
14,202✔
1071
        _cleanup_free_ char *raw = NULL;
14,202✔
1072
        const char *c;
14,202✔
1073
        int r;
14,202✔
1074

1075
        assert(pid >= 0);
14,202✔
1076
        assert(ret_cgroup);
14,202✔
1077

1078
        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
14,202✔
1079
        if (r < 0)
14,202✔
1080
                return r;
1081

1082
        r = cg_shift_path(raw, root, &c);
11,122✔
1083
        if (r < 0)
11,122✔
1084
                return r;
1085

1086
        if (c == raw) {
11,122✔
1087
                *ret_cgroup = TAKE_PTR(raw);
11,122✔
1088
                return 0;
11,122✔
1089
        }
1090

UNCOV
1091
        return strdup_to(ret_cgroup, c);
×
1092
}
1093

1094
int cg_path_decode_unit(const char *cgroup, char **ret_unit) {
32,554✔
1095
        assert(cgroup);
32,554✔
1096
        assert(ret_unit);
32,554✔
1097

1098
        size_t n = strcspn(cgroup, "/");
32,554✔
1099
        if (n < 3)
32,554✔
1100
                return -ENXIO;
1101

1102
        char *c = strndupa_safe(cgroup, n);
32,547✔
1103
        c = cg_unescape(c);
32,547✔
1104

1105
        if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
32,547✔
1106
                return -ENXIO;
1107

1108
        return strdup_to(ret_unit, c);
32,539✔
1109
}
1110

1111
static bool valid_slice_name(const char *p, size_t n) {
115,477✔
1112

1113
        if (!p)
115,477✔
1114
                return false;
1115

1116
        if (n < STRLEN("x.slice"))
115,462✔
1117
                return false;
1118

1119
        if (memcmp(p + n - 6, ".slice", 6) == 0) {
115,444✔
1120
                char buf[n+1], *c;
58,510✔
1121

1122
                memcpy(buf, p, n);
58,510✔
1123
                buf[n] = 0;
58,510✔
1124

1125
                c = cg_unescape(buf);
58,510✔
1126

1127
                return unit_name_is_valid(c, UNIT_NAME_PLAIN);
58,510✔
1128
        }
1129

1130
        return false;
1131
}
1132

1133
static const char *skip_slices(const char *p) {
41,058✔
1134
        assert(p);
41,058✔
1135

1136
        /* Skips over all slice assignments */
1137

1138
        for (;;) {
125,346✔
1139
                size_t n;
83,202✔
1140

1141
                p += strspn(p, "/");
83,202✔
1142

1143
                n = strcspn(p, "/");
83,202✔
1144
                if (!valid_slice_name(p, n))
83,202✔
1145
                        return p;
41,058✔
1146

1147
                p += n;
42,144✔
1148
        }
1149
}
1150

1151
int cg_path_get_unit(const char *path, char **ret) {
16,853✔
1152
        _cleanup_free_ char *unit = NULL;
16,853✔
1153
        const char *e;
16,853✔
1154
        int r;
16,853✔
1155

1156
        assert(path);
16,853✔
1157
        assert(ret);
16,853✔
1158

1159
        e = skip_slices(path);
16,853✔
1160

1161
        r = cg_path_decode_unit(e, &unit);
16,853✔
1162
        if (r < 0)
16,853✔
1163
                return r;
1164

1165
        /* We skipped over the slices, don't accept any now */
1166
        if (endswith(unit, ".slice"))
16,842✔
1167
                return -ENXIO;
1168

1169
        *ret = TAKE_PTR(unit);
16,842✔
1170
        return 0;
16,842✔
1171
}
1172

1173
int cg_path_get_unit_path(const char *path, char **ret) {
8,977✔
1174
        _cleanup_free_ char *path_copy = NULL;
8,977✔
1175
        char *unit_name;
8,977✔
1176

1177
        assert(path);
8,977✔
1178
        assert(ret);
8,977✔
1179

1180
        path_copy = strdup(path);
8,977✔
1181
        if (!path_copy)
8,977✔
1182
                return -ENOMEM;
1183

1184
        unit_name = (char *)skip_slices(path_copy);
8,977✔
1185
        unit_name[strcspn(unit_name, "/")] = 0;
8,977✔
1186

1187
        if (!unit_name_is_valid(cg_unescape(unit_name), UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
8,977✔
1188
                return -ENXIO;
1189

1190
        *ret = TAKE_PTR(path_copy);
8,974✔
1191

1192
        return 0;
8,974✔
1193
}
1194

1195
int cg_pid_get_unit(pid_t pid, char **ret_unit) {
573✔
1196
        _cleanup_free_ char *cgroup = NULL;
573✔
1197
        int r;
573✔
1198

1199
        assert(ret_unit);
573✔
1200

1201
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
573✔
1202
        if (r < 0)
573✔
1203
                return r;
1204

1205
        return cg_path_get_unit(cgroup, ret_unit);
569✔
1206
}
1207

1208
int cg_pidref_get_unit(const PidRef *pidref, char **ret) {
431✔
1209
        _cleanup_free_ char *unit = NULL;
431✔
1210
        int r;
431✔
1211

1212
        assert(ret);
431✔
1213

1214
        if (!pidref_is_set(pidref))
431✔
1215
                return -ESRCH;
1216
        if (pidref_is_remote(pidref))
862✔
1217
                return -EREMOTE;
1218

1219
        r = cg_pid_get_unit(pidref->pid, &unit);
431✔
1220
        if (r < 0)
431✔
1221
                return r;
1222

1223
        r = pidref_verify(pidref);
427✔
1224
        if (r < 0)
427✔
1225
                return r;
1226

1227
        *ret = TAKE_PTR(unit);
427✔
1228
        return 0;
427✔
1229
}
1230

1231
/**
1232
 * Skip session-*.scope, but require it to be there.
1233
 */
1234
static const char *skip_session(const char *p) {
14,835✔
1235
        size_t n;
14,835✔
1236

1237
        if (isempty(p))
14,835✔
1238
                return NULL;
1239

1240
        p += strspn(p, "/");
14,831✔
1241

1242
        n = strcspn(p, "/");
14,831✔
1243
        if (n < STRLEN("session-x.scope"))
14,831✔
1244
                return NULL;
1245

1246
        if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
14,679✔
1247
                char buf[n - 8 - 6 + 1];
21✔
1248

1249
                memcpy(buf, p + 8, n - 8 - 6);
21✔
1250
                buf[n - 8 - 6] = 0;
21✔
1251

1252
                /* Note that session scopes never need unescaping,
1253
                 * since they cannot conflict with the kernel's own
1254
                 * names, hence we don't need to call cg_unescape()
1255
                 * here. */
1256

1257
                if (!session_id_valid(buf))
21✔
1258
                        return NULL;
21✔
1259

1260
                p += n;
21✔
1261
                p += strspn(p, "/");
21✔
1262
                return p;
21✔
1263
        }
1264

1265
        return NULL;
1266
}
1267

1268
/**
1269
 * Skip user@*.service or capsule@*.service, but require either of them to be there.
1270
 */
1271
static const char *skip_user_manager(const char *p) {
15,228✔
1272
        size_t n;
15,228✔
1273

1274
        if (isempty(p))
15,228✔
1275
                return NULL;
15,228✔
1276

1277
        p += strspn(p, "/");
15,224✔
1278

1279
        n = strcspn(p, "/");
15,224✔
1280
        if (n < CONST_MIN(STRLEN("user@x.service"), STRLEN("capsule@x.service")))
15,224✔
1281
                return NULL;
1282

1283
        /* Any possible errors from functions called below are converted to NULL return, so our callers won't
1284
         * resolve user/capsule name. */
1285
        _cleanup_free_ char *unit_name = strndup(p, n);
15,072✔
1286
        if (!unit_name)
15,072✔
1287
                return NULL;
1288

1289
        _cleanup_free_ char *i = NULL;
15,072✔
1290
        UnitNameFlags type = unit_name_to_instance(unit_name, &i);
15,072✔
1291

1292
        if (type != UNIT_NAME_INSTANCE)
15,072✔
1293
                return NULL;
1294

1295
        /* Note that user manager services never need unescaping, since they cannot conflict with the
1296
         * kernel's own names, hence we don't need to call cg_unescape() here.  Prudently check validity of
1297
         * instance names, they should be always valid as we validate them upon unit start. */
1298
        if (startswith(unit_name, "user@")) {
477✔
1299
                if (parse_uid(i, NULL) < 0)
388✔
1300
                        return NULL;
1301

1302
                p += n;
388✔
1303
                p += strspn(p, "/");
388✔
1304
                return p;
388✔
1305
        } else if (startswith(unit_name, "capsule@")) {
89✔
1306
                if (capsule_name_is_valid(i) <= 0)
5✔
1307
                        return NULL;
1308

1309
                p += n;
5✔
1310
                p += strspn(p, "/");
5✔
1311
                return p;
5✔
1312
        }
1313

1314
        return NULL;
1315
}
1316

1317
static const char *skip_user_prefix(const char *path) {
15,228✔
1318
        const char *e, *t;
15,228✔
1319

1320
        assert(path);
15,228✔
1321

1322
        /* Skip slices, if there are any */
1323
        e = skip_slices(path);
15,228✔
1324

1325
        /* Skip the user manager, if it's in the path now... */
1326
        t = skip_user_manager(e);
15,228✔
1327
        if (t)
15,228✔
1328
                return t;
1329

1330
        /* Alternatively skip the user session if it is in the path... */
1331
        return skip_session(e);
14,835✔
1332
}
1333

1334
int cg_path_get_user_unit(const char *path, char **ret) {
7,675✔
1335
        const char *t;
7,675✔
1336

1337
        assert(path);
7,675✔
1338
        assert(ret);
7,675✔
1339

1340
        t = skip_user_prefix(path);
7,675✔
1341
        if (!t)
7,675✔
1342
                return -ENXIO;
1343

1344
        /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
1345
         * parser. */
1346
        return cg_path_get_unit(t, ret);
213✔
1347
}
1348

1349
int cg_pid_get_user_unit(pid_t pid, char **ret_unit) {
122✔
1350
        _cleanup_free_ char *cgroup = NULL;
122✔
1351
        int r;
122✔
1352

1353
        assert(ret_unit);
122✔
1354

1355
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
122✔
1356
        if (r < 0)
122✔
1357
                return r;
1358

1359
        return cg_path_get_user_unit(cgroup, ret_unit);
122✔
1360
}
1361

1362
int cg_path_get_machine_name(const char *path, char **ret_machine) {
106✔
1363
        _cleanup_free_ char *u = NULL;
106✔
1364
        const char *sl;
106✔
1365
        int r;
106✔
1366

1367
        r = cg_path_get_unit(path, &u);
106✔
1368
        if (r < 0)
106✔
1369
                return r;
1370

1371
        sl = strjoina("/run/systemd/machines/unit:", u);
530✔
1372
        return readlink_malloc(sl, ret_machine);
106✔
1373
}
1374

1375
int cg_pid_get_machine_name(pid_t pid, char **ret_machine) {
106✔
1376
        _cleanup_free_ char *cgroup = NULL;
106✔
1377
        int r;
106✔
1378

1379
        assert(ret_machine);
106✔
1380

1381
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
106✔
1382
        if (r < 0)
106✔
1383
                return r;
1384

1385
        return cg_path_get_machine_name(cgroup, ret_machine);
106✔
1386
}
1387

1388
int cg_path_get_session(const char *path, char **ret_session) {
8,360✔
1389
        _cleanup_free_ char *unit = NULL;
8,360✔
1390
        char *start, *end;
8,360✔
1391
        int r;
8,360✔
1392

1393
        assert(path);
8,360✔
1394

1395
        r = cg_path_get_unit(path, &unit);
8,360✔
1396
        if (r < 0)
8,360✔
1397
                return r;
1398

1399
        start = startswith(unit, "session-");
8,359✔
1400
        if (!start)
8,359✔
1401
                return -ENXIO;
1402
        end = endswith(start, ".scope");
250✔
1403
        if (!end)
250✔
1404
                return -ENXIO;
1405

1406
        *end = 0;
250✔
1407
        if (!session_id_valid(start))
250✔
1408
                return -ENXIO;
1409

1410
        if (!ret_session)
249✔
1411
                return 0;
1412

1413
        return strdup_to(ret_session, start);
249✔
1414
}
1415

1416
int cg_pid_get_session(pid_t pid, char **ret_session) {
747✔
1417
        _cleanup_free_ char *cgroup = NULL;
747✔
1418
        int r;
747✔
1419

1420
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
747✔
1421
        if (r < 0)
747✔
1422
                return r;
1423

1424
        return cg_path_get_session(cgroup, ret_session);
747✔
1425
}
1426

1427
int cg_pidref_get_session(const PidRef *pidref, char **ret) {
261✔
1428
        int r;
261✔
1429

1430
        if (!pidref_is_set(pidref))
261✔
1431
                return -ESRCH;
261✔
1432
        if (pidref_is_remote(pidref))
522✔
1433
                return -EREMOTE;
1434

1435
        _cleanup_free_ char *session = NULL;
261✔
1436
        r = cg_pid_get_session(pidref->pid, &session);
261✔
1437
        if (r < 0)
261✔
1438
                return r;
1439

1440
        r = pidref_verify(pidref);
209✔
1441
        if (r < 0)
209✔
1442
                return r;
1443

1444
        if (ret)
209✔
1445
                *ret = TAKE_PTR(session);
209✔
1446
        return 0;
1447
}
1448

1449
int cg_path_get_owner_uid(const char *path, uid_t *ret_uid) {
8,035✔
1450
        _cleanup_free_ char *slice = NULL;
8,035✔
1451
        char *start, *end;
8,035✔
1452
        int r;
8,035✔
1453

1454
        assert(path);
8,035✔
1455

1456
        r = cg_path_get_slice(path, &slice);
8,035✔
1457
        if (r < 0)
8,035✔
1458
                return r;
1459

1460
        start = startswith(slice, "user-");
8,035✔
1461
        if (!start)
8,035✔
1462
                return -ENXIO;
1463

1464
        end = endswith(start, ".slice");
419✔
1465
        if (!end)
419✔
1466
                return -ENXIO;
1467

1468
        *end = 0;
419✔
1469
        if (parse_uid(start, ret_uid) < 0)
419✔
UNCOV
1470
                return -ENXIO;
×
1471

1472
        return 0;
1473
}
1474

1475
int cg_pid_get_owner_uid(pid_t pid, uid_t *ret_uid) {
440✔
1476
        _cleanup_free_ char *cgroup = NULL;
440✔
1477
        int r;
440✔
1478

1479
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
440✔
1480
        if (r < 0)
440✔
1481
                return r;
1482

1483
        return cg_path_get_owner_uid(cgroup, ret_uid);
440✔
1484
}
1485

1486
int cg_pidref_get_owner_uid(const PidRef *pidref, uid_t *ret) {
50✔
1487
        int r;
50✔
1488

1489
        if (!pidref_is_set(pidref))
50✔
1490
                return -ESRCH;
50✔
1491
        if (pidref_is_remote(pidref))
50✔
1492
                return -EREMOTE;
1493

1494
        uid_t uid;
50✔
1495
        r = cg_pid_get_owner_uid(pidref->pid, &uid);
50✔
1496
        if (r < 0)
50✔
1497
                return r;
1498

1499
        r = pidref_verify(pidref);
8✔
1500
        if (r < 0)
8✔
1501
                return r;
1502

1503
        if (ret)
8✔
1504
                *ret = uid;
8✔
1505

1506
        return 0;
1507
}
1508

1509
int cg_path_get_slice(const char *p, char **ret_slice) {
15,909✔
1510
        const char *e = NULL;
15,909✔
1511

1512
        assert(p);
15,909✔
1513
        assert(ret_slice);
15,909✔
1514

1515
        /* Finds the right-most slice unit from the beginning, but stops before we come to
1516
         * the first non-slice unit. */
1517

1518
        for (;;) {
48,641✔
1519
                const char *s;
32,275✔
1520
                int n;
32,275✔
1521

1522
                n = path_find_first_component(&p, /* accept_dot_dot = */ false, &s);
32,275✔
1523
                if (n < 0)
32,275✔
UNCOV
1524
                        return n;
×
1525
                if (!valid_slice_name(s, n))
32,275✔
1526
                        break;
1527

1528
                e = s;
16,366✔
1529
        }
1530

1531
        if (e)
15,909✔
1532
                return cg_path_decode_unit(e, ret_slice);
15,692✔
1533

1534
        return strdup_to(ret_slice, SPECIAL_ROOT_SLICE);
217✔
1535
}
1536

1537
int cg_pid_get_slice(pid_t pid, char **ret_slice) {
126✔
1538
        _cleanup_free_ char *cgroup = NULL;
126✔
1539
        int r;
126✔
1540

1541
        assert(ret_slice);
126✔
1542

1543
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
126✔
1544
        if (r < 0)
126✔
1545
                return r;
1546

1547
        return cg_path_get_slice(cgroup, ret_slice);
126✔
1548
}
1549

1550
int cg_path_get_user_slice(const char *p, char **ret_slice) {
7,553✔
1551
        const char *t;
7,553✔
1552
        assert(p);
7,553✔
1553
        assert(ret_slice);
7,553✔
1554

1555
        t = skip_user_prefix(p);
7,553✔
1556
        if (!t)
7,553✔
1557
                return -ENXIO;
1558

1559
        /* And now it looks pretty much the same as for a system slice, so let's just use the same parser
1560
         * from here on. */
1561
        return cg_path_get_slice(t, ret_slice);
201✔
1562
}
1563

UNCOV
1564
int cg_pid_get_user_slice(pid_t pid, char **ret_slice) {
×
UNCOV
1565
        _cleanup_free_ char *cgroup = NULL;
×
UNCOV
1566
        int r;
×
1567

UNCOV
1568
        assert(ret_slice);
×
1569

UNCOV
1570
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
×
UNCOV
1571
        if (r < 0)
×
1572
                return r;
1573

UNCOV
1574
        return cg_path_get_user_slice(cgroup, ret_slice);
×
1575
}
1576

1577
bool cg_needs_escape(const char *p) {
17,368✔
1578

1579
        /* Checks if the specified path is a valid cgroup name by our rules, or if it must be escaped. Note
1580
         * that we consider escaped cgroup names invalid here, as they need to be escaped a second time if
1581
         * they shall be used. Also note that various names cannot be made valid by escaping even if we
1582
         * return true here (because too long, or contain the forbidden character "/"). */
1583

1584
        if (!filename_is_valid(p))
17,368✔
1585
                return true;
1586

1587
        if (IN_SET(p[0], '_', '.'))
17,364✔
1588
                return true;
1589

1590
        if (STR_IN_SET(p, "notify_on_release", "release_agent", "tasks"))
17,358✔
1591
                return true;
2✔
1592

1593
        if (startswith(p, "cgroup."))
17,356✔
1594
                return true;
1595

1596
        for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
242,956✔
1597
                const char *q;
225,602✔
1598

1599
                q = startswith(p, cgroup_controller_to_string(c));
225,602✔
1600
                if (!q)
225,602✔
1601
                        continue;
225,602✔
1602

UNCOV
1603
                if (q[0] == '.')
×
1604
                        return true;
1605
        }
1606

1607
        return false;
1608
}
1609

1610
int cg_escape(const char *p, char **ret) {
17,085✔
1611
        _cleanup_free_ char *n = NULL;
17,085✔
1612

1613
        /* This implements very minimal escaping for names to be used as file names in the cgroup tree: any
1614
         * name which might conflict with a kernel name or is prefixed with '_' is prefixed with a '_'. That
1615
         * way, when reading cgroup names it is sufficient to remove a single prefixing underscore if there
1616
         * is one. */
1617

1618
        /* The return value of this function (unlike cg_unescape()) needs free()! */
1619

1620
        if (cg_needs_escape(p)) {
17,085✔
1621
                n = strjoin("_", p);
7✔
1622
                if (!n)
7✔
1623
                        return -ENOMEM;
1624

1625
                if (!filename_is_valid(n)) /* became invalid due to the prefixing? Or contained things like a slash that cannot be fixed by prefixing? */
7✔
1626
                        return -EINVAL;
1627
        } else {
1628
                n = strdup(p);
17,078✔
1629
                if (!n)
17,078✔
1630
                        return -ENOMEM;
1631
        }
1632

1633
        *ret = TAKE_PTR(n);
17,085✔
1634
        return 0;
17,085✔
1635
}
1636

1637
char* cg_unescape(const char *p) {
100,257✔
1638
        assert(p);
100,257✔
1639

1640
        /* The return value of this function (unlike cg_escape())
1641
         * doesn't need free()! */
1642

1643
        if (p[0] == '_')
100,257✔
1644
                return (char*) p+1;
14✔
1645

1646
        return (char*) p;
1647
}
1648

1649
#define CONTROLLER_VALID                        \
1650
        DIGITS LETTERS                          \
1651
        "_"
1652

1653
bool cg_controller_is_valid(const char *p) {
402,903✔
1654
        const char *t, *s;
402,903✔
1655

1656
        if (!p)
402,903✔
1657
                return false;
1658

1659
        if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
402,903✔
1660
                return true;
1661

1662
        s = startswith(p, "name=");
119,322✔
1663
        if (s)
119,322✔
1664
                p = s;
32✔
1665

1666
        if (IN_SET(*p, 0, '_'))
119,322✔
1667
                return false;
1668

1669
        for (t = p; *t; t++)
765,408✔
1670
                if (!strchr(CONTROLLER_VALID, *t))
646,097✔
1671
                        return false;
1672

1673
        if (t - p > NAME_MAX)
119,311✔
UNCOV
1674
                return false;
×
1675

1676
        return true;
1677
}
1678

1679
int cg_slice_to_path(const char *unit, char **ret) {
7,513✔
1680
        _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
7,513✔
1681
        const char *dash;
7,513✔
1682
        int r;
7,513✔
1683

1684
        assert(unit);
7,513✔
1685
        assert(ret);
7,513✔
1686

1687
        if (streq(unit, SPECIAL_ROOT_SLICE))
7,513✔
1688
                return strdup_to(ret, "");
7✔
1689

1690
        if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
7,506✔
1691
                return -EINVAL;
1692

1693
        if (!endswith(unit, ".slice"))
7,495✔
1694
                return -EINVAL;
1695

1696
        r = unit_name_to_prefix(unit, &p);
7,494✔
1697
        if (r < 0)
7,494✔
1698
                return r;
1699

1700
        dash = strchr(p, '-');
7,494✔
1701

1702
        /* Don't allow initial dashes */
1703
        if (dash == p)
7,494✔
1704
                return -EINVAL;
1705

1706
        while (dash) {
7,727✔
1707
                _cleanup_free_ char *escaped = NULL;
238✔
1708
                char n[dash - p + sizeof(".slice")];
238✔
1709

1710
#if HAS_FEATURE_MEMORY_SANITIZER
1711
                /* msan doesn't instrument stpncpy, so it thinks
1712
                 * n is later used uninitialized:
1713
                 * https://github.com/google/sanitizers/issues/926
1714
                 */
1715
                zero(n);
1716
#endif
1717

1718
                /* Don't allow trailing or double dashes */
1719
                if (IN_SET(dash[1], 0, '-'))
238✔
1720
                        return -EINVAL;
1721

1722
                strcpy(stpncpy(n, p, dash - p), ".slice");
236✔
1723
                if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
236✔
1724
                        return -EINVAL;
1725

1726
                r = cg_escape(n, &escaped);
236✔
1727
                if (r < 0)
236✔
1728
                        return r;
1729

1730
                if (!strextend(&s, escaped, "/"))
236✔
1731
                        return -ENOMEM;
1732

1733
                dash = strchr(dash+1, '-');
236✔
1734
        }
1735

1736
        r = cg_escape(unit, &e);
7,489✔
1737
        if (r < 0)
7,489✔
1738
                return r;
1739

1740
        if (!strextend(&s, e))
7,489✔
1741
                return -ENOMEM;
1742

1743
        *ret = TAKE_PTR(s);
7,489✔
1744
        return 0;
7,489✔
1745
}
1746

UNCOV
1747
int cg_is_threaded(const char *path) {
×
UNCOV
1748
        _cleanup_free_ char *fs = NULL, *contents = NULL;
×
UNCOV
1749
        _cleanup_strv_free_ char **v = NULL;
×
UNCOV
1750
        int r;
×
1751

UNCOV
1752
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.type", &fs);
×
UNCOV
1753
        if (r < 0)
×
1754
                return r;
1755

UNCOV
1756
        r = read_full_virtual_file(fs, &contents, NULL);
×
UNCOV
1757
        if (r == -ENOENT)
×
1758
                return false; /* Assume no. */
UNCOV
1759
        if (r < 0)
×
1760
                return r;
1761

UNCOV
1762
        v = strv_split(contents, NULL);
×
UNCOV
1763
        if (!v)
×
1764
                return -ENOMEM;
1765

1766
        /* If the cgroup is in the threaded mode, it contains "threaded".
1767
         * If one of the parents or siblings is in the threaded mode, it may contain "invalid". */
1768
        return strv_contains(v, "threaded") || strv_contains(v, "invalid");
×
1769
}
1770

1771
int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
48,951✔
1772
        _cleanup_free_ char *p = NULL;
48,951✔
1773
        int r;
48,951✔
1774

1775
        r = cg_get_path(controller, path, attribute, &p);
48,951✔
1776
        if (r < 0)
48,951✔
1777
                return r;
1778

1779
        return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
48,951✔
1780
}
1781

1782
int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
29,242✔
1783
        _cleanup_free_ char *p = NULL;
29,242✔
1784
        int r;
29,242✔
1785

1786
        r = cg_get_path(controller, path, attribute, &p);
29,242✔
1787
        if (r < 0)
29,242✔
1788
                return r;
1789

1790
        return read_one_line_file(p, ret);
29,242✔
1791
}
1792

1793
int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret) {
25,318✔
1794
        _cleanup_free_ char *value = NULL;
25,318✔
1795
        uint64_t v;
25,318✔
1796
        int r;
25,318✔
1797

1798
        assert(ret);
25,318✔
1799

1800
        r = cg_get_attribute(controller, path, attribute, &value);
25,318✔
1801
        if (r == -ENOENT)
25,318✔
1802
                return -ENODATA;
1803
        if (r < 0)
22,074✔
1804
                return r;
1805

1806
        if (streq(value, "max")) {
22,074✔
1807
                *ret = CGROUP_LIMIT_MAX;
4,621✔
1808
                return 0;
4,621✔
1809
        }
1810

1811
        r = safe_atou64(value, &v);
17,453✔
1812
        if (r < 0)
17,453✔
1813
                return r;
1814

1815
        *ret = v;
17,453✔
1816
        return 0;
17,453✔
1817
}
1818

1819
int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret) {
63✔
1820
        _cleanup_free_ char *value = NULL;
63✔
1821
        int r;
63✔
1822

1823
        assert(ret);
63✔
1824

1825
        r = cg_get_attribute(controller, path, attribute, &value);
63✔
1826
        if (r == -ENOENT)
63✔
1827
                return -ENODATA;
1828
        if (r < 0)
63✔
1829
                return r;
1830

1831
        r = parse_boolean(value);
63✔
1832
        if (r < 0)
63✔
1833
                return r;
1834

1835
        *ret = r;
63✔
1836
        return 0;
63✔
1837
}
1838

1839
int cg_get_owner(const char *path, uid_t *ret_uid) {
35✔
1840
        _cleanup_free_ char *f = NULL;
35✔
1841
        struct stat stats;
35✔
1842
        int r;
35✔
1843

1844
        assert(ret_uid);
35✔
1845

1846
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &f);
35✔
1847
        if (r < 0)
35✔
1848
                return r;
1849

1850
        if (stat(f, &stats) < 0)
35✔
1851
                return -errno;
16✔
1852

1853
        r = stat_verify_directory(&stats);
19✔
1854
        if (r < 0)
19✔
1855
                return r;
1856

1857
        *ret_uid = stats.st_uid;
19✔
1858
        return 0;
19✔
1859
}
1860

1861
int cg_get_keyed_attribute_full(
34,536✔
1862
                const char *controller,
1863
                const char *path,
1864
                const char *attribute,
1865
                char **keys,
1866
                char **ret_values,
1867
                CGroupKeyMode mode) {
1868

1869
        _cleanup_free_ char *filename = NULL, *contents = NULL;
34,536✔
1870
        const char *p;
34,536✔
1871
        size_t n, i, n_done = 0;
34,536✔
1872
        char **v;
34,536✔
1873
        int r;
34,536✔
1874

1875
        /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
1876
         * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
1877
         * entries as 'keys'. On success each entry will be set to the value of the matching key.
1878
         *
1879
         * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. If mode
1880
         * is set to GG_KEY_MODE_GRACEFUL we ignore missing keys and return those that were parsed successfully. */
1881

1882
        r = cg_get_path(controller, path, attribute, &filename);
34,536✔
1883
        if (r < 0)
34,536✔
1884
                return r;
1885

1886
        r = read_full_file(filename, &contents, NULL);
34,536✔
1887
        if (r < 0)
34,536✔
1888
                return r;
1889

1890
        n = strv_length(keys);
28,037✔
1891
        if (n == 0) /* No keys to retrieve? That's easy, we are done then */
28,037✔
1892
                return 0;
1893

1894
        /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
1895
        v = newa0(char*, n);
28,037✔
1896

1897
        for (p = contents; *p;) {
95,956✔
1898
                const char *w = NULL;
1899

1900
                for (i = 0; i < n; i++)
163,875✔
1901
                        if (!v[i]) {
105,805✔
1902
                                w = first_word(p, keys[i]);
95,956✔
1903
                                if (w)
95,956✔
1904
                                        break;
1905
                        }
1906

1907
                if (w) {
95,956✔
1908
                        size_t l;
37,886✔
1909

1910
                        l = strcspn(w, NEWLINE);
37,886✔
1911
                        v[i] = strndup(w, l);
37,886✔
1912
                        if (!v[i]) {
37,886✔
UNCOV
1913
                                r = -ENOMEM;
×
UNCOV
1914
                                goto fail;
×
1915
                        }
1916

1917
                        n_done++;
37,886✔
1918
                        if (n_done >= n)
37,886✔
1919
                                goto done;
28,037✔
1920

1921
                        p = w + l;
9,849✔
1922
                } else
1923
                        p += strcspn(p, NEWLINE);
58,070✔
1924

1925
                p += strspn(p, NEWLINE);
67,919✔
1926
        }
1927

UNCOV
1928
        if (mode & CG_KEY_MODE_GRACEFUL)
×
UNCOV
1929
                goto done;
×
1930

1931
        r = -ENXIO;
1932

1933
fail:
×
1934
        free_many_charp(v, n);
34,536✔
1935
        return r;
1936

1937
done:
28,037✔
1938
        memcpy(ret_values, v, sizeof(char*) * n);
28,037✔
1939
        if (mode & CG_KEY_MODE_GRACEFUL)
28,037✔
1940
                return n_done;
9,849✔
1941

1942
        return 0;
1943
}
1944

1945
int cg_mask_to_string(CGroupMask mask, char **ret) {
16,855✔
1946
        _cleanup_free_ char *s = NULL;
16,855✔
1947
        bool space = false;
16,855✔
1948
        CGroupController c;
16,855✔
1949
        size_t n = 0;
16,855✔
1950

1951
        assert(ret);
16,855✔
1952

1953
        if (mask == 0) {
16,855✔
1954
                *ret = NULL;
8,073✔
1955
                return 0;
8,073✔
1956
        }
1957

1958
        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
122,948✔
1959
                const char *k;
114,166✔
1960
                size_t l;
114,166✔
1961

1962
                if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
114,166✔
1963
                        continue;
35,641✔
1964

1965
                k = cgroup_controller_to_string(c);
78,525✔
1966
                l = strlen(k);
78,525✔
1967

1968
                if (!GREEDY_REALLOC(s, n + space + l + 1))
78,525✔
1969
                        return -ENOMEM;
1970

1971
                if (space)
78,525✔
1972
                        s[n] = ' ';
69,743✔
1973
                memcpy(s + n + space, k, l);
78,525✔
1974
                n += space + l;
78,525✔
1975

1976
                space = true;
78,525✔
1977
        }
1978

1979
        assert(s);
8,782✔
1980

1981
        s[n] = 0;
8,782✔
1982
        *ret = TAKE_PTR(s);
8,782✔
1983

1984
        return 0;
8,782✔
1985
}
1986

1987
int cg_mask_from_string(const char *value, CGroupMask *ret) {
6,446✔
1988
        CGroupMask m = 0;
6,446✔
1989

1990
        assert(ret);
6,446✔
1991
        assert(value);
6,446✔
1992

1993
        for (;;) {
57,866✔
1994
                _cleanup_free_ char *n = NULL;
51,420✔
1995
                CGroupController v;
57,866✔
1996
                int r;
57,866✔
1997

1998
                r = extract_first_word(&value, &n, NULL, 0);
57,866✔
1999
                if (r < 0)
57,866✔
UNCOV
2000
                        return r;
×
2001
                if (r == 0)
57,866✔
2002
                        break;
2003

2004
                v = cgroup_controller_from_string(n);
51,420✔
2005
                if (v < 0)
51,420✔
2006
                        continue;
725✔
2007

2008
                m |= CGROUP_CONTROLLER_TO_MASK(v);
50,695✔
2009
        }
2010

2011
        *ret = m;
6,446✔
2012
        return 0;
6,446✔
2013
}
2014

2015
int cg_mask_supported_subtree(const char *root, CGroupMask *ret) {
508✔
2016
        CGroupMask mask;
508✔
2017
        int r;
508✔
2018

2019
        /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
2020
         * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
2021
         * pseudo-controllers. */
2022

2023
        r = cg_all_unified();
508✔
2024
        if (r < 0)
508✔
2025
                return r;
508✔
2026
        if (r > 0) {
508✔
2027
                _cleanup_free_ char *controllers = NULL, *path = NULL;
508✔
2028

2029
                /* In the unified hierarchy we can read the supported and accessible controllers from
2030
                 * the top-level cgroup attribute */
2031

2032
                r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
508✔
2033
                if (r < 0)
508✔
2034
                        return r;
2035

2036
                r = read_one_line_file(path, &controllers);
508✔
2037
                if (r < 0)
508✔
2038
                        return r;
2039

2040
                r = cg_mask_from_string(controllers, &mask);
508✔
2041
                if (r < 0)
508✔
2042
                        return r;
2043

2044
                /* Mask controllers that are not supported in unified hierarchy. */
2045
                mask &= CGROUP_MASK_V2;
508✔
2046

2047
        } else {
UNCOV
2048
                CGroupController c;
×
2049

2050
                /* In the legacy hierarchy, we check which hierarchies are accessible. */
2051

UNCOV
2052
                mask = 0;
×
UNCOV
2053
                for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
×
UNCOV
2054
                        CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
×
UNCOV
2055
                        const char *n;
×
2056

UNCOV
2057
                        if (!FLAGS_SET(CGROUP_MASK_V1, bit))
×
UNCOV
2058
                                continue;
×
2059

UNCOV
2060
                        n = cgroup_controller_to_string(c);
×
UNCOV
2061
                        if (controller_is_v1_accessible(root, n) >= 0)
×
UNCOV
2062
                                mask |= bit;
×
2063
                }
2064
        }
2065

2066
        *ret = mask;
508✔
2067
        return 0;
508✔
2068
}
2069

2070
int cg_mask_supported(CGroupMask *ret) {
247✔
2071
        _cleanup_free_ char *root = NULL;
247✔
2072
        int r;
247✔
2073

2074
        r = cg_get_root_path(&root);
247✔
2075
        if (r < 0)
247✔
2076
                return r;
2077

2078
        return cg_mask_supported_subtree(root, ret);
247✔
2079
}
2080

2081
int cg_kernel_controllers(Set **ret) {
×
UNCOV
2082
        _cleanup_set_free_ Set *controllers = NULL;
×
UNCOV
2083
        _cleanup_fclose_ FILE *f = NULL;
×
UNCOV
2084
        int r;
×
2085

UNCOV
2086
        assert(ret);
×
2087

2088
        /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
2089
         * and controllers that aren't currently accessible (because not mounted). This does not include "name="
2090
         * pseudo-controllers. */
2091

UNCOV
2092
        r = fopen_unlocked("/proc/cgroups", "re", &f);
×
UNCOV
2093
        if (r == -ENOENT) {
×
UNCOV
2094
                *ret = NULL;
×
UNCOV
2095
                return 0;
×
2096
        }
UNCOV
2097
        if (r < 0)
×
2098
                return r;
2099

2100
        /* Ignore the header line */
2101
        (void) read_line(f, SIZE_MAX, NULL);
×
2102

2103
        for (;;) {
×
UNCOV
2104
                _cleanup_free_ char *controller = NULL;
×
2105
                int enabled = 0;
×
2106

UNCOV
2107
                if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
×
2108

UNCOV
2109
                        if (ferror(f))
×
UNCOV
2110
                                return -errno;
×
2111

2112
                        if (feof(f))
×
2113
                                break;
2114

2115
                        return -EBADMSG;
2116
                }
2117

UNCOV
2118
                if (!enabled)
×
UNCOV
2119
                        continue;
×
2120

UNCOV
2121
                if (!cg_controller_is_valid(controller))
×
2122
                        return -EBADMSG;
2123

2124
                r = set_ensure_consume(&controllers, &string_hash_ops_free, TAKE_PTR(controller));
×
UNCOV
2125
                if (r < 0)
×
2126
                        return r;
2127
        }
2128

2129
        *ret = TAKE_PTR(controllers);
×
2130

2131
        return 0;
×
2132
}
2133

2134
/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on
2135
 * /sys/fs/cgroup/systemd. This unfortunately broke other tools (such as docker) which expected the v1
2136
 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mounts v2 on
2137
 * /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility
2138
 * with other tools.
2139
 *
2140
 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep
2141
 * cgroup v2 process management but disable the compat dual layout, we return true on
2142
 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and false on cg_hybrid_unified().
2143
 */
2144
static thread_local bool unified_systemd_v232;
2145

2146
int cg_unified_cached(bool flush) {
664,266✔
2147
        static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
664,266✔
2148

2149
        struct statfs fs;
664,266✔
2150

2151
        /* Checks if we support the unified hierarchy. Returns an
2152
         * error when the cgroup hierarchies aren't mounted yet or we
2153
         * have any other trouble determining if the unified hierarchy
2154
         * is supported. */
2155

2156
        if (flush)
664,266✔
2157
                unified_cache = CGROUP_UNIFIED_UNKNOWN;
17,933✔
2158
        else if (unified_cache >= CGROUP_UNIFIED_NONE)
646,333✔
2159
                return unified_cache;
664,266✔
2160

2161
        if (statfs("/sys/fs/cgroup/", &fs) < 0)
32,871✔
UNCOV
2162
                return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
×
2163

2164
        if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
32,871✔
2165
                log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
32,871✔
2166
                unified_cache = CGROUP_UNIFIED_ALL;
32,871✔
UNCOV
2167
        } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
×
UNCOV
2168
                if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
×
UNCOV
2169
                    F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
×
UNCOV
2170
                        log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
×
UNCOV
2171
                        unified_cache = CGROUP_UNIFIED_SYSTEMD;
×
UNCOV
2172
                        unified_systemd_v232 = false;
×
2173
                } else {
UNCOV
2174
                        if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) {
×
UNCOV
2175
                                if (errno == ENOENT) {
×
2176
                                        /* Some other software may have set up /sys/fs/cgroup in a configuration we do not recognize. */
UNCOV
2177
                                        log_debug_errno(errno, "Unsupported cgroupsv1 setup detected: name=systemd hierarchy not found.");
×
UNCOV
2178
                                        return -ENOMEDIUM;
×
2179
                                }
UNCOV
2180
                                return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
×
2181
                        }
2182

UNCOV
2183
                        if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
×
UNCOV
2184
                                log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
×
UNCOV
2185
                                unified_cache = CGROUP_UNIFIED_SYSTEMD;
×
2186
                                unified_systemd_v232 = true;
×
2187
                        } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
×
2188
                                log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
×
2189
                                unified_cache = CGROUP_UNIFIED_NONE;
×
2190
                        } else {
2191
                                log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
×
2192
                                          (unsigned long long) fs.f_type);
2193
                                unified_cache = CGROUP_UNIFIED_NONE;
×
2194
                        }
2195
                }
2196
        } else if (F_TYPE_EQUAL(fs.f_type, SYSFS_MAGIC)) {
×
2197
                return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
×
2198
                                       "No filesystem is currently mounted on /sys/fs/cgroup.");
2199
        } else
UNCOV
2200
                return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
×
2201
                                       "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2202
                                       (unsigned long long)fs.f_type);
2203

2204
        return unified_cache;
32,871✔
2205
}
2206

2207
int cg_unified_controller(const char *controller) {
103,851✔
2208
        int r;
103,851✔
2209

2210
        r = cg_unified_cached(false);
103,851✔
2211
        if (r < 0)
103,851✔
2212
                return r;
2213

2214
        if (r == CGROUP_UNIFIED_NONE)
103,851✔
2215
                return false;
2216

2217
        if (r >= CGROUP_UNIFIED_ALL)
103,851✔
2218
                return true;
2219

UNCOV
2220
        return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
×
2221
}
2222

2223
int cg_all_unified(void) {
516,089✔
2224
        int r;
516,089✔
2225

2226
        r = cg_unified_cached(false);
516,089✔
2227
        if (r < 0)
516,089✔
2228
                return r;
2229

2230
        return r >= CGROUP_UNIFIED_ALL;
516,089✔
2231
}
2232

2233
int cg_hybrid_unified(void) {
26,392✔
2234
        int r;
26,392✔
2235

2236
        r = cg_unified_cached(false);
26,392✔
2237
        if (r < 0)
26,392✔
2238
                return r;
2239

2240
        return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
26,392✔
2241
}
2242

2243
int cg_is_delegated(const char *path) {
19✔
2244
        int r;
19✔
2245

2246
        assert(path);
19✔
2247

2248
        r = cg_get_xattr_bool(path, "trusted.delegate");
19✔
2249
        if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
19✔
2250
                return r;
2251

2252
        /* If the trusted xattr isn't set (preferred), then check the untrusted one. Under the assumption
2253
         * that whoever is trusted enough to own the cgroup, is also trusted enough to decide if it is
2254
         * delegated or not this should be safe. */
2255
        r = cg_get_xattr_bool(path, "user.delegate");
6✔
2256
        return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
6✔
2257
}
2258

2259
int cg_is_delegated_fd(int fd) {
214✔
2260
        int r;
214✔
2261

2262
        assert(fd >= 0);
214✔
2263

2264
        r = getxattr_at_bool(fd, /* path= */ NULL, "trusted.delegate", /* flags= */ 0);
214✔
2265
        if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
214✔
2266
                return r;
2267

2268
        r = getxattr_at_bool(fd, /* path= */ NULL, "user.delegate", /* flags= */ 0);
200✔
2269
        return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
200✔
2270
}
2271

2272
int cg_has_coredump_receive(const char *path) {
2✔
2273
        int r;
2✔
2274

2275
        assert(path);
2✔
2276

2277
        r = cg_get_xattr_bool(path, "user.coredump_receive");
2✔
2278
        if (ERRNO_IS_NEG_XATTR_ABSENT(r))
2✔
UNCOV
2279
                return false;
×
2280

2281
        return r;
2282
}
2283

2284
const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2285
        [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
2286
        [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
2287
        [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
2288
        [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
2289
};
2290

2291
static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2292
        [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
2293
        [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
2294
        [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
2295
        [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
2296
};
2297

2298
DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
4,325✔
2299

2300
static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2301
        [CGROUP_CONTROLLER_CPU] = "cpu",
2302
        [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2303
        [CGROUP_CONTROLLER_CPUSET] = "cpuset",
2304
        [CGROUP_CONTROLLER_IO] = "io",
2305
        [CGROUP_CONTROLLER_BLKIO] = "blkio",
2306
        [CGROUP_CONTROLLER_MEMORY] = "memory",
2307
        [CGROUP_CONTROLLER_DEVICES] = "devices",
2308
        [CGROUP_CONTROLLER_PIDS] = "pids",
2309
        [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
2310
        [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
2311
        [CGROUP_CONTROLLER_BPF_FOREIGN] = "bpf-foreign",
2312
        [CGROUP_CONTROLLER_BPF_SOCKET_BIND] = "bpf-socket-bind",
2313
        [CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES] = "bpf-restrict-network-interfaces",
2314
};
2315

2316
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
386,372✔
2317

2318
CGroupMask get_cpu_accounting_mask(void) {
1,736,971✔
2319
        static CGroupMask needed_mask = (CGroupMask) -1;
1,736,971✔
2320

2321
        /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
2322
         * provided externally from the CPU controller, which means we don't
2323
         * need to enable the CPU controller just to get metrics. This is good,
2324
         * because enabling the CPU controller comes at a minor performance
2325
         * hit, especially when it's propagated deep into large hierarchies.
2326
         * There's also no separate CPU accounting controller available within
2327
         * a unified hierarchy.
2328
         *
2329
         * This combination of factors results in the desired cgroup mask to
2330
         * enable for CPU accounting varying as follows:
2331
         *
2332
         *                   ╔═════════════════════╤═════════════════════╗
2333
         *                   ║     Linux ≥4.15     │     Linux <4.15     ║
2334
         *   ╔═══════════════╬═════════════════════╪═════════════════════╣
2335
         *   ║ Unified       ║ nothing             │ CGROUP_MASK_CPU     ║
2336
         *   ╟───────────────╫─────────────────────┼─────────────────────╢
2337
         *   ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
2338
         *   ╚═══════════════╩═════════════════════╧═════════════════════╝
2339
         *
2340
         * We check kernel version here instead of manually checking whether
2341
         * cpu.stat is present for every cgroup, as that check in itself would
2342
         * already be fairly expensive.
2343
         *
2344
         * Kernels where this patch has been backported will therefore have the
2345
         * CPU controller enabled unnecessarily. This is more expensive than
2346
         * necessary, but harmless. ☺️
2347
         */
2348

2349
        if (needed_mask == (CGroupMask) -1) {
1,736,971✔
2350
                if (cg_all_unified()) {
718✔
2351
                        struct utsname u;
718✔
2352
                        assert_se(uname(&u) >= 0);
718✔
2353

2354
                        if (strverscmp_improved(u.release, "4.15") < 0)
718✔
UNCOV
2355
                                needed_mask = CGROUP_MASK_CPU;
×
2356
                        else
2357
                                needed_mask = 0;
718✔
2358
                } else
UNCOV
2359
                        needed_mask = CGROUP_MASK_CPUACCT;
×
2360
        }
2361

2362
        return needed_mask;
1,736,971✔
2363
}
2364

2365
bool cpu_accounting_is_cheap(void) {
2,321✔
2366
        return get_cpu_accounting_mask() == 0;
2,321✔
2367
}
2368

2369
static const char* const managed_oom_mode_table[_MANAGED_OOM_MODE_MAX] = {
2370
        [MANAGED_OOM_AUTO] = "auto",
2371
        [MANAGED_OOM_KILL] = "kill",
2372
};
2373

2374
DEFINE_STRING_TABLE_LOOKUP(managed_oom_mode, ManagedOOMMode);
37,539✔
2375

2376
static const char* const managed_oom_preference_table[_MANAGED_OOM_PREFERENCE_MAX] = {
2377
        [MANAGED_OOM_PREFERENCE_NONE] = "none",
2378
        [MANAGED_OOM_PREFERENCE_AVOID] = "avoid",
2379
        [MANAGED_OOM_PREFERENCE_OMIT] = "omit",
2380
};
2381

2382
DEFINE_STRING_TABLE_LOOKUP(managed_oom_preference, ManagedOOMPreference);
18,546✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc