• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 13912360373

17 Mar 2025 10:34PM UTC coverage: 71.946% (+0.03%) from 71.915%
13912360373

push

github

web-flow
nsresourced,vmspawn: allow unpriv "tap" based networking in vmspawn (#36688)

This extends nsresourced to also allow delegation of a network tap
device (in addition to veth) to unpriv clients, with a strictly enforced
naming scheme.

also tightens security on a couple of things:

* enforces polkit on all nsresourced ops too (though by default still
everything is allowed)
* put a limit on delegated network devices
* forcibly clean up delegated network devices when the userns goes away

145 of 375 new or added lines in 14 files covered. (38.67%)

2324 existing lines in 47 files now uncovered.

296268 of 411794 relevant lines covered (71.95%)

711485.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.56
/src/basic/cgroup-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <errno.h>
4
#include <limits.h>
5
#include <signal.h>
6
#include <stddef.h>
7
#include <stdlib.h>
8
#include <sys/types.h>
9
#include <sys/utsname.h>
10
#include <sys/xattr.h>
11
#include <threads.h>
12
#include <unistd.h>
13

14
#include "alloc-util.h"
15
#include "capsule-util.h"
16
#include "cgroup-util.h"
17
#include "constants.h"
18
#include "dirent-util.h"
19
#include "extract-word.h"
20
#include "fd-util.h"
21
#include "fileio.h"
22
#include "format-util.h"
23
#include "fs-util.h"
24
#include "log.h"
25
#include "login-util.h"
26
#include "macro.h"
27
#include "missing_fs.h"
28
#include "missing_magic.h"
29
#include "mkdir.h"
30
#include "parse-util.h"
31
#include "path-util.h"
32
#include "process-util.h"
33
#include "set.h"
34
#include "special.h"
35
#include "stat-util.h"
36
#include "stdio-util.h"
37
#include "string-table.h"
38
#include "string-util.h"
39
#include "strv.h"
40
#include "unit-name.h"
41
#include "user-util.h"
42
#include "xattr-util.h"
43

44
int cg_path_open(const char *controller, const char *path) {
784✔
45
        _cleanup_free_ char *fs = NULL;
784✔
46
        int r;
784✔
47

48
        r = cg_get_path(controller, path, /* item=*/ NULL, &fs);
784✔
49
        if (r < 0)
784✔
50
                return r;
51

52
        return RET_NERRNO(open(fs, O_DIRECTORY|O_CLOEXEC));
784✔
53
}
54

55
int cg_cgroupid_open(int cgroupfs_fd, uint64_t id) {
8✔
56
        _cleanup_close_ int fsfd = -EBADF;
8✔
57

58
        if (cgroupfs_fd < 0) {
8✔
59
                fsfd = open("/sys/fs/cgroup", O_CLOEXEC|O_DIRECTORY);
7✔
60
                if (fsfd < 0)
7✔
61
                        return -errno;
×
62

63
                cgroupfs_fd = fsfd;
64
        }
65

66
        cg_file_handle fh = CG_FILE_HANDLE_INIT;
8✔
67
        CG_FILE_HANDLE_CGROUPID(fh) = id;
8✔
68

69
        return RET_NERRNO(open_by_handle_at(cgroupfs_fd, &fh.file_handle, O_DIRECTORY|O_CLOEXEC));
15✔
70
}
71

72
int cg_path_from_cgroupid(int cgroupfs_fd, uint64_t id, char **ret) {
×
73
        _cleanup_close_ int cgfd = -EBADF;
×
74
        int r;
×
75

76
        cgfd = cg_cgroupid_open(cgroupfs_fd, id);
×
77
        if (cgfd < 0)
×
78
                return cgfd;
79

80
        _cleanup_free_ char *path = NULL;
×
81
        r = fd_get_path(cgfd, &path);
×
82
        if (r < 0)
×
83
                return r;
84

85
        if (!path_startswith(path, "/sys/fs/cgroup/"))
×
86
                return -EXDEV; /* recognizable error */
87

88
        if (ret)
×
89
                *ret = TAKE_PTR(path);
×
90
        return 0;
91
}
92

93
int cg_get_cgroupid_at(int dfd, const char *path, uint64_t *ret) {
5,755✔
94
        cg_file_handle fh = CG_FILE_HANDLE_INIT;
5,755✔
95
        int mnt_id;
5,755✔
96

97
        assert(dfd >= 0 || (dfd == AT_FDCWD && path_is_absolute(path)));
11,476✔
98
        assert(ret);
5,755✔
99

100
        /* This is cgroupfs so we know the size of the handle, thus no need to loop around like
101
         * name_to_handle_at_loop() does in mountpoint-util.c */
102
        if (name_to_handle_at(dfd, strempty(path), &fh.file_handle, &mnt_id, isempty(path) ? AT_EMPTY_PATH : 0) < 0) {
11,510✔
103
                assert(errno != EOVERFLOW);
×
104
                return -errno;
×
105
        }
106

107
        *ret = CG_FILE_HANDLE_CGROUPID(fh);
5,755✔
108
        return 0;
5,755✔
109
}
110

111
static int cg_enumerate_items(const char *controller, const char *path, FILE **ret, const char *item) {
29,928✔
112
        _cleanup_free_ char *fs = NULL;
29,928✔
113
        FILE *f;
29,928✔
114
        int r;
29,928✔
115

116
        assert(ret);
29,928✔
117

118
        r = cg_get_path(controller, path, item, &fs);
29,928✔
119
        if (r < 0)
29,928✔
120
                return r;
121

122
        f = fopen(fs, "re");
29,928✔
123
        if (!f)
29,928✔
124
                return -errno;
18,858✔
125

126
        *ret = f;
11,070✔
127
        return 0;
11,070✔
128
}
129

130
int cg_enumerate_processes(const char *controller, const char *path, FILE **ret) {
411✔
131
        return cg_enumerate_items(controller, path, ret, "cgroup.procs");
411✔
132
}
133

134
int cg_read_pid(FILE *f, pid_t *ret, CGroupFlags flags) {
21,916✔
135
        unsigned long ul;
21,916✔
136

137
        /* Note that the cgroup.procs might contain duplicates! See cgroups.txt for details. */
138

139
        assert(f);
21,916✔
140
        assert(ret);
21,916✔
141

142
        for (;;) {
21,916✔
143
                errno = 0;
21,916✔
144
                if (fscanf(f, "%lu", &ul) != 1) {
21,916✔
145

146
                        if (feof(f)) {
11,300✔
147
                                *ret = 0;
11,300✔
148
                                return 0;
11,300✔
149
                        }
150

151
                        return errno_or_else(EIO);
×
152
                }
153

154
                if (ul > PID_T_MAX)
10,616✔
155
                        return -EIO;
156

157
                /* In some circumstances (e.g. WSL), cgroups might contain unmappable PIDs from other
158
                 * contexts. These show up as zeros, and depending on the caller, can either be plain
159
                 * skipped over, or returned as-is. */
160
                if (ul == 0 && !FLAGS_SET(flags, CGROUP_DONT_SKIP_UNMAPPED))
10,616✔
161
                        continue;
×
162

163
                *ret = (pid_t) ul;
10,616✔
164
                return 1;
10,616✔
165
        }
166
}
167

168
int cg_read_pidref(FILE *f, PidRef *ret, CGroupFlags flags) {
14,884✔
169
        int r;
14,884✔
170

171
        assert(f);
14,884✔
172
        assert(ret);
14,884✔
173

174
        for (;;) {
×
175
                pid_t pid;
14,884✔
176

177
                r = cg_read_pid(f, &pid, flags);
14,884✔
178
                if (r < 0)
14,884✔
179
                        return log_debug_errno(r, "Failed to read pid from cgroup item: %m");
×
180
                if (r == 0) {
14,884✔
181
                        *ret = PIDREF_NULL;
10,676✔
182
                        return 0;
10,676✔
183
                }
184

185
                if (pid == 0)
4,208✔
186
                        return -EREMOTE;
187

188
                if (FLAGS_SET(flags, CGROUP_NO_PIDFD)) {
4,208✔
189
                        *ret = PIDREF_MAKE_FROM_PID(pid);
532✔
190
                        return 1;
532✔
191
                }
192

193
                r = pidref_set_pid(ret, pid);
3,676✔
194
                if (r >= 0)
3,676✔
195
                        return 1;
196
                if (r != -ESRCH)
×
197
                        return r;
198

199
                /* ESRCH → gone by now? just skip over it, read the next */
200
        }
201
}
202

203
int cg_read_event(
12,558✔
204
                const char *controller,
205
                const char *path,
206
                const char *event,
207
                char **ret) {
208

209
        _cleanup_free_ char *events = NULL, *content = NULL;
12,558✔
210
        int r;
12,558✔
211

212
        r = cg_get_path(controller, path, "cgroup.events", &events);
12,558✔
213
        if (r < 0)
12,558✔
214
                return r;
215

216
        r = read_full_virtual_file(events, &content, NULL);
12,558✔
217
        if (r < 0)
12,558✔
218
                return r;
219

220
        for (const char *p = content;;) {
4,783✔
221
                _cleanup_free_ char *line = NULL, *key = NULL;
4,783✔
222
                const char *q;
4,783✔
223

224
                r = extract_first_word(&p, &line, "\n", 0);
4,783✔
225
                if (r < 0)
4,783✔
226
                        return r;
227
                if (r == 0)
4,783✔
228
                        return -ENOENT;
229

230
                q = line;
4,783✔
231
                r = extract_first_word(&q, &key, " ", 0);
4,783✔
232
                if (r < 0)
4,783✔
233
                        return r;
234
                if (r == 0)
4,783✔
235
                        return -EINVAL;
236

237
                if (!streq(key, event))
4,783✔
238
                        continue;
×
239

240
                return strdup_to(ret, q);
4,783✔
241
        }
242
}
243

244
bool cg_ns_supported(void) {
607✔
245
        static thread_local int supported = -1;
607✔
246

247
        if (supported >= 0)
607✔
248
                return supported;
×
249

250
        if (access("/proc/self/ns/cgroup", F_OK) >= 0)
607✔
251
                return (supported = true);
607✔
252
        if (errno != ENOENT)
×
253
                log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
×
254
        return (supported = false);
×
255
}
256

257
bool cg_freezer_supported(void) {
×
258
        static thread_local int supported = -1;
×
259

260
        if (supported >= 0)
×
261
                return supported;
×
262

263
        if (cg_all_unified() <= 0)
×
264
                return (supported = false);
×
265

266
        if (access("/sys/fs/cgroup/init.scope/cgroup.freeze", F_OK) >= 0)
×
267
                return (supported = true);
×
268
        if (errno != ENOENT)
×
269
                log_debug_errno(errno, "Failed to check whether cgroup freezer is available, assuming not: %m");
×
270
        return (supported = false);
×
271
}
272

273
bool cg_kill_supported(void) {
×
274
        static thread_local int supported = -1;
×
275

276
        if (supported >= 0)
×
277
                return supported;
×
278

279
        if (cg_all_unified() <= 0)
×
280
                return (supported = false);
×
281

282
        if (access("/sys/fs/cgroup/init.scope/cgroup.kill", F_OK) >= 0)
×
283
                return (supported = true);
×
284
        if (errno != ENOENT)
×
285
                log_debug_errno(errno, "Failed to check whether cgroup.kill is available, assuming not: %m");
×
286
        return (supported = false);
×
287
}
288

289
int cg_enumerate_subgroups(const char *controller, const char *path, DIR **ret) {
27,651✔
290
        _cleanup_free_ char *fs = NULL;
27,651✔
291
        DIR *d;
27,651✔
292
        int r;
27,651✔
293

294
        assert(ret);
27,651✔
295

296
        /* This is not recursive! */
297

298
        r = cg_get_path(controller, path, NULL, &fs);
27,651✔
299
        if (r < 0)
27,651✔
300
                return r;
301

302
        d = opendir(fs);
27,651✔
303
        if (!d)
27,651✔
304
                return -errno;
16,203✔
305

306
        *ret = d;
11,448✔
307
        return 0;
11,448✔
308
}
309

310
int cg_read_subgroup(DIR *d, char **ret) {
16,958✔
311
        assert(d);
16,958✔
312
        assert(ret);
16,958✔
313

314
        FOREACH_DIRENT_ALL(de, d, return -errno) {
538,016✔
315
                if (de->d_type != DT_DIR)
526,338✔
316
                        continue;
497,702✔
317

318
                if (dot_or_dot_dot(de->d_name))
28,636✔
319
                        continue;
23,356✔
320

321
                return strdup_to_full(ret, de->d_name);
5,280✔
322
        }
323

324
        *ret = NULL;
11,678✔
325
        return 0;
11,678✔
326
}
327

328
static int cg_kill_items(
29,278✔
329
                const char *path,
330
                const char *item,
331
                int sig,
332
                CGroupFlags flags,
333
                Set *s,
334
                cg_kill_log_func_t log_kill,
335
                void *userdata) {
336

337
        _cleanup_set_free_ Set *allocated_set = NULL;
29,278✔
338
        int r, ret = 0;
29,278✔
339

340
        assert(path);
29,278✔
341
        assert(item);
29,278✔
342
        assert(sig >= 0);
29,278✔
343

344
         /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence
345
          * don't send SIGCONT on SIGKILL. */
346
        if (IN_SET(sig, SIGCONT, SIGKILL))
29,278✔
347
                flags &= ~CGROUP_SIGCONT;
10,998✔
348

349
        /* This goes through the tasks list and kills them all. This is repeated until no further processes
350
         * are added to the tasks list, to properly handle forking processes.
351
         *
352
         * When sending SIGKILL, prefer cg_kill_kernel_sigkill(), which is fully atomic. */
353

354
        if (!s) {
29,278✔
355
                s = allocated_set = set_new(NULL);
920✔
356
                if (!s)
920✔
357
                        return -ENOMEM;
358
        }
359

360
        bool done;
29,517✔
361
        do {
29,517✔
362
                _cleanup_fclose_ FILE *f = NULL;
18,858✔
363
                int ret_log_kill;
29,517✔
364

365
                done = true;
29,517✔
366

367
                r = cg_enumerate_items(SYSTEMD_CGROUP_CONTROLLER, path, &f, item);
29,517✔
368
                if (r == -ENOENT)
29,517✔
369
                        break;
370
                if (r < 0)
10,659✔
371
                        return RET_GATHER(ret, log_debug_errno(r, "Failed to enumerate cgroup items: %m"));
×
372

373
                for (;;) {
14,803✔
374
                        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
14,803✔
375

376
                        r = cg_read_pidref(f, &pidref, flags);
14,803✔
377
                        if (r < 0)
14,803✔
378
                                return RET_GATHER(ret, log_debug_errno(r, "Failed to read pidref from cgroup '%s': %m", path));
×
379
                        if (r == 0)
14,803✔
380
                                break;
381

382
                        if ((flags & CGROUP_IGNORE_SELF) && pidref_is_self(&pidref))
4,144✔
383
                                continue;
920✔
384

385
                        if (set_contains(s, PID_TO_PTR(pidref.pid)))
3,224✔
386
                                continue;
1,988✔
387

388
                        /* Ignore kernel threads to mimic the behavior of cgroup.kill. */
389
                        if (pidref_is_kernel_thread(&pidref) > 0) {
1,236✔
390
                                log_debug("Ignoring kernel thread with pid " PID_FMT " in cgroup '%s'", pidref.pid, path);
×
391
                                continue;
×
392
                        }
393

394
                        if (log_kill)
1,236✔
395
                                ret_log_kill = log_kill(&pidref, sig, userdata);
91✔
396

397
                        /* If we haven't killed this process yet, kill it */
398
                        r = pidref_kill(&pidref, sig);
1,236✔
399
                        if (r < 0 && r != -ESRCH)
1,236✔
400
                                RET_GATHER(ret, log_debug_errno(r, "Failed to kill process with pid " PID_FMT " from cgroup '%s': %m", pidref.pid, path));
×
401
                        if (r >= 0) {
1,236✔
402
                                if (flags & CGROUP_SIGCONT)
1,236✔
403
                                        (void) pidref_kill(&pidref, SIGCONT);
1,143✔
404

405
                                if (ret == 0) {
1,236✔
406
                                        if (log_kill)
309✔
407
                                                ret = ret_log_kill;
408
                                        else
409
                                                ret = 1;
218✔
410
                                }
411
                        }
412

413
                        done = false;
1,236✔
414

415
                        r = set_put(s, PID_TO_PTR(pidref.pid));
1,236✔
416
                        if (r < 0)
1,236✔
417
                                return RET_GATHER(ret, r);
×
418
                }
419

420
                /* To avoid racing against processes which fork quicker than we can kill them, we repeat this
421
                 * until no new pids need to be killed. */
422

423
        } while (!done);
10,659✔
424

425
        return ret;
426
}
427

428
int cg_kill(
23,779✔
429
                const char *path,
430
                int sig,
431
                CGroupFlags flags,
432
                Set *s,
433
                cg_kill_log_func_t log_kill,
434
                void *userdata) {
435

436
        int r, ret;
23,779✔
437

438
        assert(path);
23,779✔
439

440
        ret = cg_kill_items(path, "cgroup.procs", sig, flags, s, log_kill, userdata);
23,779✔
441
        if (ret < 0)
23,779✔
442
                return log_debug_errno(ret, "Failed to kill processes in cgroup '%s' item cgroup.procs: %m", path);
×
443
        if (sig != SIGKILL)
23,779✔
444
                return ret;
445

446
        /* Only in case of killing with SIGKILL and when using cgroupsv2, kill remaining threads manually as
447
           a workaround for kernel bug. It was fixed in 5.2-rc5 (c03cd7738a83), backported to 4.19.66
448
           (4340d175b898) and 4.14.138 (feb6b123b7dd). */
449
        r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
5,499✔
450
        if (r < 0)
5,499✔
451
                return r;
452
        if (r == 0)
5,499✔
453
                return ret;
454

455
        /* Opening pidfds for non thread group leaders only works from 6.9 onwards with PIDFD_THREAD. On
456
         * older kernels or without PIDFD_THREAD pidfd_open() fails with EINVAL. Since we might read non
457
         * thread group leader IDs from cgroup.threads, we set CGROUP_NO_PIDFD to avoid trying open pidfd's
458
         * for them and instead use the regular pid. */
459
        r = cg_kill_items(path, "cgroup.threads", sig, flags|CGROUP_NO_PIDFD, s, log_kill, userdata);
5,499✔
460
        if (r < 0)
5,499✔
461
                return log_debug_errno(r, "Failed to kill processes in cgroup '%s' item cgroup.threads: %m", path);
×
462

463
        return r > 0 || ret > 0;
5,499✔
464
}
465

466
int cg_kill_recursive(
23,317✔
467
                const char *path,
468
                int sig,
469
                CGroupFlags flags,
470
                Set *s,
471
                cg_kill_log_func_t log_kill,
472
                void *userdata) {
473

474
        _cleanup_set_free_ Set *allocated_set = NULL;
×
475
        _cleanup_closedir_ DIR *d = NULL;
23,317✔
476
        int r, ret;
23,317✔
477

478
        assert(path);
23,317✔
479
        assert(sig >= 0);
23,317✔
480

481
        if (!s) {
23,317✔
482
                s = allocated_set = set_new(NULL);
22,399✔
483
                if (!s)
22,399✔
484
                        return -ENOMEM;
485
        }
486

487
        ret = cg_kill(path, sig, flags, s, log_kill, userdata);
23,317✔
488

489
        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
23,317✔
490
        if (r < 0) {
23,317✔
491
                if (r != -ENOENT)
16,203✔
492
                        RET_GATHER(ret, log_debug_errno(r, "Failed to enumerate cgroup '%s' subgroups: %m", path));
×
493

494
                return ret;
16,203✔
495
        }
496

497
        for (;;) {
7,478✔
498
                _cleanup_free_ char *fn = NULL, *p = NULL;
7,296✔
499

500
                r = cg_read_subgroup(d, &fn);
7,296✔
501
                if (r < 0) {
7,296✔
502
                        RET_GATHER(ret, log_debug_errno(r, "Failed to read subgroup from cgroup '%s': %m", path));
×
503
                        break;
504
                }
505
                if (r == 0)
7,296✔
506
                        break;
507

508
                p = path_join(empty_to_root(path), fn);
364✔
509
                if (!p)
182✔
510
                        return -ENOMEM;
×
511

512
                r = cg_kill_recursive(p, sig, flags, s, log_kill, userdata);
182✔
513
                if (r < 0)
182✔
514
                        log_debug_errno(r, "Failed to recursively kill processes in cgroup '%s': %m", p);
×
515
                if (r != 0 && ret >= 0)
182✔
516
                        ret = r;
15✔
517
        }
518

519
        return ret;
7,114✔
520
}
521

522
int cg_kill_kernel_sigkill(const char *path) {
×
523
        _cleanup_free_ char *killfile = NULL;
×
524
        int r;
×
525

526
        /* Kills the cgroup at `path` directly by writing to its cgroup.kill file.  This sends SIGKILL to all
527
         * processes in the cgroup and has the advantage of being completely atomic, unlike cg_kill_items(). */
528

529
        assert(path);
×
530

531
        if (!cg_kill_supported())
×
532
                return -EOPNOTSUPP;
533

534
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.kill", &killfile);
×
535
        if (r < 0)
×
536
                return r;
537

538
        r = write_string_file(killfile, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
×
539
        if (r < 0)
×
540
                return log_debug_errno(r, "Failed to write to cgroup.kill for cgroup '%s': %m", path);
×
541

542
        return 0;
543
}
544

545
static const char *controller_to_dirname(const char *controller) {
×
546
        assert(controller);
×
547

548
        /* Converts a controller name to the directory name below /sys/fs/cgroup/ we want to mount it
549
         * to. Effectively, this just cuts off the name= prefixed used for named hierarchies, if it is
550
         * specified. */
551

552
        if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
×
553
                if (cg_hybrid_unified() > 0)
×
554
                        controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
555
                else
556
                        controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
×
557
        }
558

559
        return startswith(controller, "name=") ?: controller;
×
560
}
561

562
static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **ret) {
×
563
        const char *dn;
×
564
        char *t = NULL;
×
565

566
        assert(ret);
×
567
        assert(controller);
×
568

569
        dn = controller_to_dirname(controller);
×
570

571
        if (isempty(path) && isempty(suffix))
×
572
                t = path_join("/sys/fs/cgroup", dn);
×
573
        else if (isempty(path))
×
574
                t = path_join("/sys/fs/cgroup", dn, suffix);
×
575
        else if (isempty(suffix))
×
576
                t = path_join("/sys/fs/cgroup", dn, path);
×
577
        else
578
                t = path_join("/sys/fs/cgroup", dn, path, suffix);
×
579
        if (!t)
×
580
                return -ENOMEM;
581

582
        *ret = t;
×
583
        return 0;
×
584
}
585

586
static int join_path_unified(const char *path, const char *suffix, char **ret) {
327,630✔
587
        char *t;
327,630✔
588

589
        assert(ret);
327,630✔
590

591
        if (isempty(path) && isempty(suffix))
343,383✔
592
                t = strdup("/sys/fs/cgroup");
2,585✔
593
        else if (isempty(path))
325,045✔
594
                t = path_join("/sys/fs/cgroup", suffix);
13,168✔
595
        else if (isempty(suffix))
311,877✔
596
                t = path_join("/sys/fs/cgroup", path);
122,030✔
597
        else
598
                t = path_join("/sys/fs/cgroup", path, suffix);
189,847✔
599
        if (!t)
327,630✔
600
                return -ENOMEM;
601

602
        *ret = t;
327,630✔
603
        return 0;
327,630✔
604
}
605

606
int cg_get_path(const char *controller, const char *path, const char *suffix, char **ret) {
327,887✔
607
        int r;
327,887✔
608

609
        assert(ret);
327,887✔
610

611
        if (!controller) {
327,887✔
612
                char *t;
257✔
613

614
                /* If no controller is specified, we return the path *below* the controllers, without any
615
                 * prefix. */
616

617
                if (isempty(path) && isempty(suffix))
257✔
618
                        return -EINVAL;
619

620
                if (isempty(suffix))
257✔
621
                        t = strdup(path);
×
622
                else if (isempty(path))
257✔
623
                        t = strdup(suffix);
×
624
                else
625
                        t = path_join(path, suffix);
257✔
626
                if (!t)
257✔
627
                        return -ENOMEM;
628

629
                *ret = path_simplify(t);
257✔
630
                return 0;
257✔
631
        }
632

633
        if (!cg_controller_is_valid(controller))
327,630✔
634
                return -EINVAL;
635

636
        r = cg_all_unified();
327,630✔
637
        if (r < 0)
327,630✔
638
                return r;
639
        if (r > 0)
327,630✔
640
                r = join_path_unified(path, suffix, ret);
327,630✔
641
        else
642
                r = join_path_legacy(controller, path, suffix, ret);
×
643
        if (r < 0)
327,630✔
644
                return r;
645

646
        path_simplify(*ret);
327,630✔
647
        return 0;
327,630✔
648
}
649

650
static int controller_is_v1_accessible(const char *root, const char *controller) {
×
651
        const char *cpath, *dn;
×
652

653
        assert(controller);
×
654

655
        dn = controller_to_dirname(controller);
×
656

657
        /* If root if specified, we check that:
658
         * - possible subcgroup is created at root,
659
         * - we can modify the hierarchy. */
660

661
        cpath = strjoina("/sys/fs/cgroup/", dn, root, root ? "/cgroup.procs" : NULL);
×
662
        return access_nofollow(cpath, root ? W_OK : F_OK);
×
663
}
664

665
int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **ret) {
23,920✔
666
        int r;
23,920✔
667

668
        assert(controller);
23,920✔
669
        assert(ret);
23,920✔
670

671
        if (!cg_controller_is_valid(controller))
23,920✔
672
                return -EINVAL;
673

674
        r = cg_all_unified();
23,920✔
675
        if (r < 0)
23,920✔
676
                return r;
677
        if (r > 0) {
23,920✔
678
                /* In the unified hierarchy all controllers are considered accessible,
679
                 * except for the named hierarchies */
680
                if (startswith(controller, "name="))
23,920✔
681
                        return -EOPNOTSUPP;
682
        } else {
683
                /* Check if the specified controller is actually accessible */
684
                r = controller_is_v1_accessible(NULL, controller);
×
685
                if (r < 0)
×
686
                        return r;
687
        }
688

689
        return cg_get_path(controller, path, suffix, ret);
23,920✔
690
}
691

692
int cg_set_xattr(const char *path, const char *name, const void *value, size_t size, int flags) {
8,072✔
693
        _cleanup_free_ char *fs = NULL;
8,072✔
694
        int r;
8,072✔
695

696
        assert(path);
8,072✔
697
        assert(name);
8,072✔
698
        assert(value || size <= 0);
8,072✔
699

700
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
8,072✔
701
        if (r < 0)
8,072✔
702
                return r;
703

704
        return RET_NERRNO(setxattr(fs, name, value, size, flags));
8,072✔
705
}
706

707
int cg_get_xattr(const char *path, const char *name, void *value, size_t size) {
×
708
        _cleanup_free_ char *fs = NULL;
×
709
        ssize_t n;
×
710
        int r;
×
711

712
        assert(path);
×
713
        assert(name);
×
714

715
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
×
716
        if (r < 0)
×
717
                return r;
718

719
        n = getxattr(fs, name, value, size);
×
720
        if (n < 0)
×
721
                return -errno;
×
722

723
        return (int) n;
×
724
}
725

726
int cg_get_xattr_malloc(const char *path, const char *name, char **ret) {
19,983✔
727
        _cleanup_free_ char *fs = NULL;
19,983✔
728
        int r;
19,983✔
729

730
        assert(path);
19,983✔
731
        assert(name);
19,983✔
732

733
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
19,983✔
734
        if (r < 0)
19,983✔
735
                return r;
736

737
        return lgetxattr_malloc(fs, name, ret);
19,983✔
738
}
739

740
int cg_get_xattr_bool(const char *path, const char *name) {
423✔
741
        _cleanup_free_ char *fs = NULL;
423✔
742
        int r;
423✔
743

744
        assert(path);
423✔
745
        assert(name);
423✔
746

747
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
423✔
748
        if (r < 0)
423✔
749
                return r;
750

751
        return getxattr_at_bool(AT_FDCWD, fs, name, /* flags= */ 0);
423✔
752
}
753

754
int cg_remove_xattr(const char *path, const char *name) {
39,909✔
755
        _cleanup_free_ char *fs = NULL;
39,909✔
756
        int r;
39,909✔
757

758
        assert(path);
39,909✔
759
        assert(name);
39,909✔
760

761
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
39,909✔
762
        if (r < 0)
39,909✔
763
                return r;
764

765
        return RET_NERRNO(removexattr(fs, name));
79,818✔
766
}
767

768
int cg_pid_get_path(const char *controller, pid_t pid, char **ret_path) {
50,768✔
769
        _cleanup_fclose_ FILE *f = NULL;
50,768✔
770
        const char *fs, *controller_str = NULL;  /* avoid false maybe-uninitialized warning */
50,768✔
771
        int unified, r;
50,768✔
772

773
        assert(pid >= 0);
50,768✔
774
        assert(ret_path);
50,768✔
775

776
        if (controller) {
50,768✔
777
                if (!cg_controller_is_valid(controller))
50,432✔
778
                        return -EINVAL;
779
        } else
780
                controller = SYSTEMD_CGROUP_CONTROLLER;
781

782
        unified = cg_unified_controller(controller);
50,768✔
783
        if (unified < 0)
50,768✔
784
                return unified;
785
        if (unified == 0) {
50,768✔
786
                if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
×
787
                        controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
788
                else
789
                        controller_str = controller;
×
790
        }
791

792
        fs = procfs_file_alloca(pid, "cgroup");
50,768✔
793
        r = fopen_unlocked(fs, "re", &f);
50,768✔
794
        if (r == -ENOENT)
50,768✔
795
                return -ESRCH;
796
        if (r < 0)
47,949✔
797
                return r;
798

799
        for (;;) {
49,251✔
800
                _cleanup_free_ char *line = NULL;
48,600✔
801
                char *e;
48,600✔
802

803
                r = read_line(f, LONG_LINE_MAX, &line);
48,600✔
804
                if (r < 0)
48,600✔
805
                        return r;
806
                if (r == 0)
48,591✔
807
                        return -ENODATA;
808

809
                if (unified) {
48,591✔
810
                        e = startswith(line, "0:");
48,591✔
811
                        if (!e)
48,591✔
812
                                continue;
651✔
813

814
                        e = strchr(e, ':');
47,940✔
815
                        if (!e)
47,940✔
816
                                continue;
×
817
                } else {
818
                        char *l;
×
819

820
                        l = strchr(line, ':');
×
821
                        if (!l)
×
822
                                continue;
×
823

824
                        l++;
×
825
                        e = strchr(l, ':');
×
826
                        if (!e)
×
827
                                continue;
×
828
                        *e = 0;
×
829

830
                        assert(controller_str);
×
831
                        r = string_contains_word(l, ",", controller_str);
×
832
                        if (r < 0)
×
833
                                return r;
834
                        if (r == 0)
×
835
                                continue;
×
836
                }
837

838
                _cleanup_free_ char *path = strdup(e + 1);
47,940✔
839
                if (!path)
47,940✔
840
                        return -ENOMEM;
841

842
                /* Refuse cgroup paths from outside our cgroup namespace */
843
                if (startswith(path, "/../"))
47,940✔
844
                        return -EUNATCH;
845

846
                /* Truncate suffix indicating the process is a zombie */
847
                e = endswith(path, " (deleted)");
47,940✔
848
                if (e)
47,940✔
849
                        *e = 0;
1,159✔
850

851
                *ret_path = TAKE_PTR(path);
47,940✔
852
                return 0;
47,940✔
853
        }
854
}
855

856
int cg_pidref_get_path(const char *controller, const PidRef *pidref, char **ret_path) {
21,042✔
857
        _cleanup_free_ char *path = NULL;
21,042✔
858
        int r;
21,042✔
859

860
        assert(ret_path);
21,042✔
861

862
        if (!pidref_is_set(pidref))
21,042✔
863
                return -ESRCH;
864
        if (pidref_is_remote(pidref))
42,084✔
865
                return -EREMOTE;
866

867
        // XXX: Ideally we'd use pidfd_get_cgroupid() + cg_path_from_cgroupid() here, to extract this
868
        // bit of information from pidfd directly. However, the latter requires privilege and it's
869
        // not entirely clear how to handle cgroups from outer namespace.
870

871
        r = cg_pid_get_path(controller, pidref->pid, &path);
21,042✔
872
        if (r < 0)
21,042✔
873
                return r;
874

875
        /* Before we return the path, make sure the procfs entry for this pid still matches the pidref */
876
        r = pidref_verify(pidref);
21,042✔
877
        if (r < 0)
21,042✔
878
                return r;
879

880
        *ret_path = TAKE_PTR(path);
21,042✔
881
        return 0;
21,042✔
882
}
883

884
int cg_is_empty(const char *controller, const char *path) {
4✔
885
        _cleanup_fclose_ FILE *f = NULL;
4✔
886
        pid_t pid;
4✔
887
        int r;
4✔
888

889
        assert(path);
4✔
890

891
        r = cg_enumerate_processes(controller, path, &f);
4✔
892
        if (r == -ENOENT)
4✔
893
                return true;
894
        if (r < 0)
4✔
895
                return r;
896

897
        r = cg_read_pid(f, &pid, CGROUP_DONT_SKIP_UNMAPPED);
4✔
898
        if (r < 0)
4✔
899
                return r;
900

901
        return r == 0;
4✔
902
}
903

904
int cg_is_empty_recursive(const char *controller, const char *path) {
12,558✔
905
        int r;
12,558✔
906

907
        assert(path);
12,558✔
908

909
        /* The root cgroup is always populated */
910
        if (controller && empty_or_root(path))
12,558✔
911
                return false;
912

913
        r = cg_unified_controller(controller);
12,558✔
914
        if (r < 0)
12,558✔
915
                return r;
916
        if (r > 0) {
12,558✔
917
                _cleanup_free_ char *t = NULL;
12,558✔
918

919
                /* On the unified hierarchy we can check empty state
920
                 * via the "populated" attribute of "cgroup.events". */
921

922
                r = cg_read_event(controller, path, "populated", &t);
12,558✔
923
                if (r == -ENOENT)
12,558✔
924
                        return true;
925
                if (r < 0)
4,783✔
926
                        return r;
927

928
                return streq(t, "0");
4,783✔
929
        } else {
930
                _cleanup_closedir_ DIR *d = NULL;
×
931
                char *fn;
×
932

933
                r = cg_is_empty(controller, path);
×
934
                if (r <= 0)
×
935
                        return r;
936

937
                r = cg_enumerate_subgroups(controller, path, &d);
×
938
                if (r == -ENOENT)
×
939
                        return true;
940
                if (r < 0)
×
941
                        return r;
942

943
                while ((r = cg_read_subgroup(d, &fn)) > 0) {
×
944
                        _cleanup_free_ char *p = NULL;
×
945

946
                        p = path_join(path, fn);
×
947
                        free(fn);
×
948
                        if (!p)
×
949
                                return -ENOMEM;
950

951
                        r = cg_is_empty_recursive(controller, p);
×
952
                        if (r <= 0)
×
953
                                return r;
954
                }
955
                if (r < 0)
×
956
                        return r;
957

958
                return true;
×
959
        }
960
}
961

962
int cg_split_spec(const char *spec, char **ret_controller, char **ret_path) {
23✔
963
        _cleanup_free_ char *controller = NULL, *path = NULL;
23✔
964
        int r;
23✔
965

966
        assert(spec);
23✔
967

968
        if (*spec == '/') {
23✔
969
                if (!path_is_normalized(spec))
15✔
970
                        return -EINVAL;
971

972
                if (ret_path) {
15✔
973
                        r = path_simplify_alloc(spec, &path);
15✔
974
                        if (r < 0)
15✔
975
                                return r;
976
                }
977

978
        } else {
979
                const char *e;
8✔
980

981
                e = strchr(spec, ':');
8✔
982
                if (e) {
8✔
983
                        controller = strndup(spec, e-spec);
6✔
984
                        if (!controller)
6✔
985
                                return -ENOMEM;
986
                        if (!cg_controller_is_valid(controller))
6✔
987
                                return -EINVAL;
988

989
                        if (!isempty(e + 1)) {
3✔
990
                                path = strdup(e+1);
2✔
991
                                if (!path)
2✔
992
                                        return -ENOMEM;
993

994
                                if (!path_is_normalized(path) ||
2✔
995
                                    !path_is_absolute(path))
2✔
996
                                        return -EINVAL;
997

998
                                path_simplify(path);
1✔
999
                        }
1000

1001
                } else {
1002
                        if (!cg_controller_is_valid(spec))
2✔
1003
                                return -EINVAL;
1004

1005
                        if (ret_controller) {
1✔
1006
                                controller = strdup(spec);
1✔
1007
                                if (!controller)
1✔
1008
                                        return -ENOMEM;
1009
                        }
1010
                }
1011
        }
1012

1013
        if (ret_controller)
18✔
1014
                *ret_controller = TAKE_PTR(controller);
18✔
1015
        if (ret_path)
18✔
1016
                *ret_path = TAKE_PTR(path);
18✔
1017
        return 0;
1018
}
1019

1020
int cg_mangle_path(const char *path, char **ret) {
465✔
1021
        _cleanup_free_ char *c = NULL, *p = NULL;
465✔
1022
        int r;
465✔
1023

1024
        assert(path);
465✔
1025
        assert(ret);
465✔
1026

1027
        /* First, check if it already is a filesystem path */
1028
        if (path_startswith(path, "/sys/fs/cgroup"))
465✔
1029
                return path_simplify_alloc(path, ret);
461✔
1030

1031
        /* Otherwise, treat it as cg spec */
1032
        r = cg_split_spec(path, &c, &p);
4✔
1033
        if (r < 0)
4✔
1034
                return r;
1035

1036
        return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, ret);
8✔
1037
}
1038

1039
int cg_get_root_path(char **ret_path) {
14,019✔
1040
        char *p, *e;
14,019✔
1041
        int r;
14,019✔
1042

1043
        assert(ret_path);
14,019✔
1044

1045
        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
14,019✔
1046
        if (r < 0)
14,019✔
1047
                return r;
14,019✔
1048

1049
        e = endswith(p, "/" SPECIAL_INIT_SCOPE);
14,019✔
1050
        if (!e)
14,019✔
1051
                e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
79✔
1052
        if (!e)
79✔
1053
                e = endswith(p, "/system"); /* even more legacy */
79✔
1054
        if (e)
14,019✔
1055
                *e = 0;
13,940✔
1056

1057
        *ret_path = p;
14,019✔
1058
        return 0;
14,019✔
1059
}
1060

1061
int cg_shift_path(const char *cgroup, const char *root, const char **ret_shifted) {
11,242✔
1062
        _cleanup_free_ char *rt = NULL;
11,242✔
1063
        char *p;
11,242✔
1064
        int r;
11,242✔
1065

1066
        assert(cgroup);
11,242✔
1067
        assert(ret_shifted);
11,242✔
1068

1069
        if (!root) {
11,242✔
1070
                /* If the root was specified let's use that, otherwise
1071
                 * let's determine it from PID 1 */
1072

1073
                r = cg_get_root_path(&rt);
2,125✔
1074
                if (r < 0)
2,125✔
1075
                        return r;
1076

1077
                root = rt;
2,125✔
1078
        }
1079

1080
        p = path_startswith(cgroup, root);
11,242✔
1081
        if (p && p > cgroup)
11,242✔
1082
                *ret_shifted = p - 1;
2✔
1083
        else
1084
                *ret_shifted = cgroup;
11,240✔
1085

1086
        return 0;
1087
}
1088

1089
int cg_pid_get_path_shifted(pid_t pid, const char *root, char **ret_cgroup) {
13,894✔
1090
        _cleanup_free_ char *raw = NULL;
13,894✔
1091
        const char *c;
13,894✔
1092
        int r;
13,894✔
1093

1094
        assert(pid >= 0);
13,894✔
1095
        assert(ret_cgroup);
13,894✔
1096

1097
        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
13,894✔
1098
        if (r < 0)
13,894✔
1099
                return r;
1100

1101
        r = cg_shift_path(raw, root, &c);
11,066✔
1102
        if (r < 0)
11,066✔
1103
                return r;
1104

1105
        if (c == raw) {
11,066✔
1106
                *ret_cgroup = TAKE_PTR(raw);
11,066✔
1107
                return 0;
11,066✔
1108
        }
1109

1110
        return strdup_to(ret_cgroup, c);
×
1111
}
1112

1113
int cg_path_decode_unit(const char *cgroup, char **ret_unit) {
32,524✔
1114
        assert(cgroup);
32,524✔
1115
        assert(ret_unit);
32,524✔
1116

1117
        size_t n = strcspn(cgroup, "/");
32,524✔
1118
        if (n < 3)
32,524✔
1119
                return -ENXIO;
1120

1121
        char *c = strndupa_safe(cgroup, n);
32,517✔
1122
        c = cg_unescape(c);
32,517✔
1123

1124
        if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
32,517✔
1125
                return -ENXIO;
1126

1127
        return strdup_to(ret_unit, c);
32,509✔
1128
}
1129

1130
static bool valid_slice_name(const char *p, size_t n) {
115,332✔
1131

1132
        if (!p)
115,332✔
1133
                return false;
1134

1135
        if (n < STRLEN("x.slice"))
115,317✔
1136
                return false;
1137

1138
        if (memcmp(p + n - 6, ".slice", 6) == 0) {
115,299✔
1139
                char buf[n+1], *c;
58,406✔
1140

1141
                memcpy(buf, p, n);
58,406✔
1142
                buf[n] = 0;
58,406✔
1143

1144
                c = cg_unescape(buf);
58,406✔
1145

1146
                return unit_name_is_valid(c, UNIT_NAME_PLAIN);
58,406✔
1147
        }
1148

1149
        return false;
1150
}
1151

1152
static const char *skip_slices(const char *p) {
41,026✔
1153
        assert(p);
41,026✔
1154

1155
        /* Skips over all slice assignments */
1156

1157
        for (;;) {
125,150✔
1158
                size_t n;
83,088✔
1159

1160
                p += strspn(p, "/");
83,088✔
1161

1162
                n = strcspn(p, "/");
83,088✔
1163
                if (!valid_slice_name(p, n))
83,088✔
1164
                        return p;
41,026✔
1165

1166
                p += n;
42,062✔
1167
        }
1168
}
1169

1170
int cg_path_get_unit(const char *path, char **ret) {
16,840✔
1171
        _cleanup_free_ char *unit = NULL;
16,840✔
1172
        const char *e;
16,840✔
1173
        int r;
16,840✔
1174

1175
        assert(path);
16,840✔
1176
        assert(ret);
16,840✔
1177

1178
        e = skip_slices(path);
16,840✔
1179

1180
        r = cg_path_decode_unit(e, &unit);
16,840✔
1181
        if (r < 0)
16,840✔
1182
                return r;
1183

1184
        /* We skipped over the slices, don't accept any now */
1185
        if (endswith(unit, ".slice"))
16,829✔
1186
                return -ENXIO;
1187

1188
        *ret = TAKE_PTR(unit);
16,829✔
1189
        return 0;
16,829✔
1190
}
1191

1192
int cg_path_get_unit_path(const char *path, char **ret) {
8,957✔
1193
        _cleanup_free_ char *path_copy = NULL;
8,957✔
1194
        char *unit_name;
8,957✔
1195

1196
        assert(path);
8,957✔
1197
        assert(ret);
8,957✔
1198

1199
        path_copy = strdup(path);
8,957✔
1200
        if (!path_copy)
8,957✔
1201
                return -ENOMEM;
1202

1203
        unit_name = (char *)skip_slices(path_copy);
8,957✔
1204
        unit_name[strcspn(unit_name, "/")] = 0;
8,957✔
1205

1206
        if (!unit_name_is_valid(cg_unescape(unit_name), UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
8,957✔
1207
                return -ENXIO;
1208

1209
        *ret = TAKE_PTR(path_copy);
8,954✔
1210

1211
        return 0;
8,954✔
1212
}
1213

1214
int cg_pid_get_unit(pid_t pid, char **ret_unit) {
569✔
1215
        _cleanup_free_ char *cgroup = NULL;
569✔
1216
        int r;
569✔
1217

1218
        assert(ret_unit);
569✔
1219

1220
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
569✔
1221
        if (r < 0)
569✔
1222
                return r;
1223

1224
        return cg_path_get_unit(cgroup, ret_unit);
565✔
1225
}
1226

1227
int cg_pidref_get_unit(const PidRef *pidref, char **ret) {
427✔
1228
        _cleanup_free_ char *unit = NULL;
427✔
1229
        int r;
427✔
1230

1231
        assert(ret);
427✔
1232

1233
        if (!pidref_is_set(pidref))
427✔
1234
                return -ESRCH;
1235
        if (pidref_is_remote(pidref))
854✔
1236
                return -EREMOTE;
1237

1238
        r = cg_pid_get_unit(pidref->pid, &unit);
427✔
1239
        if (r < 0)
427✔
1240
                return r;
1241

1242
        r = pidref_verify(pidref);
423✔
1243
        if (r < 0)
423✔
1244
                return r;
1245

1246
        *ret = TAKE_PTR(unit);
423✔
1247
        return 0;
423✔
1248
}
1249

1250
/**
1251
 * Skip session-*.scope, but require it to be there.
1252
 */
1253
static const char *skip_session(const char *p) {
14,844✔
1254
        size_t n;
14,844✔
1255

1256
        if (isempty(p))
14,844✔
1257
                return NULL;
1258

1259
        p += strspn(p, "/");
14,840✔
1260

1261
        n = strcspn(p, "/");
14,840✔
1262
        if (n < STRLEN("session-x.scope"))
14,840✔
1263
                return NULL;
1264

1265
        if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
14,674✔
1266
                char buf[n - 8 - 6 + 1];
21✔
1267

1268
                memcpy(buf, p + 8, n - 8 - 6);
21✔
1269
                buf[n - 8 - 6] = 0;
21✔
1270

1271
                /* Note that session scopes never need unescaping,
1272
                 * since they cannot conflict with the kernel's own
1273
                 * names, hence we don't need to call cg_unescape()
1274
                 * here. */
1275

1276
                if (!session_id_valid(buf))
21✔
1277
                        return NULL;
21✔
1278

1279
                p += n;
21✔
1280
                p += strspn(p, "/");
21✔
1281
                return p;
21✔
1282
        }
1283

1284
        return NULL;
1285
}
1286

1287
/**
1288
 * Skip user@*.service or capsule@*.service, but require either of them to be there.
1289
 */
1290
static const char *skip_user_manager(const char *p) {
15,229✔
1291
        size_t n;
15,229✔
1292

1293
        if (isempty(p))
15,229✔
1294
                return NULL;
15,229✔
1295

1296
        p += strspn(p, "/");
15,225✔
1297

1298
        n = strcspn(p, "/");
15,225✔
1299
        if (n < CONST_MIN(STRLEN("user@x.service"), STRLEN("capsule@x.service")))
15,225✔
1300
                return NULL;
1301

1302
        /* Any possible errors from functions called below are converted to NULL return, so our callers won't
1303
         * resolve user/capsule name. */
1304
        _cleanup_free_ char *unit_name = strndup(p, n);
15,059✔
1305
        if (!unit_name)
15,059✔
1306
                return NULL;
1307

1308
        _cleanup_free_ char *i = NULL;
15,059✔
1309
        UnitNameFlags type = unit_name_to_instance(unit_name, &i);
15,059✔
1310

1311
        if (type != UNIT_NAME_INSTANCE)
15,059✔
1312
                return NULL;
1313

1314
        /* Note that user manager services never need unescaping, since they cannot conflict with the
1315
         * kernel's own names, hence we don't need to call cg_unescape() here.  Prudently check validity of
1316
         * instance names, they should be always valid as we validate them upon unit start. */
1317
        if (startswith(unit_name, "user@")) {
471✔
1318
                if (parse_uid(i, NULL) < 0)
380✔
1319
                        return NULL;
1320

1321
                p += n;
380✔
1322
                p += strspn(p, "/");
380✔
1323
                return p;
380✔
1324
        } else if (startswith(unit_name, "capsule@")) {
91✔
1325
                if (capsule_name_is_valid(i) <= 0)
5✔
1326
                        return NULL;
1327

1328
                p += n;
5✔
1329
                p += strspn(p, "/");
5✔
1330
                return p;
5✔
1331
        }
1332

1333
        return NULL;
1334
}
1335

1336
static const char *skip_user_prefix(const char *path) {
15,229✔
1337
        const char *e, *t;
15,229✔
1338

1339
        assert(path);
15,229✔
1340

1341
        /* Skip slices, if there are any */
1342
        e = skip_slices(path);
15,229✔
1343

1344
        /* Skip the user manager, if it's in the path now... */
1345
        t = skip_user_manager(e);
15,229✔
1346
        if (t)
15,229✔
1347
                return t;
1348

1349
        /* Alternatively skip the user session if it is in the path... */
1350
        return skip_session(e);
14,844✔
1351
}
1352

1353
int cg_path_get_user_unit(const char *path, char **ret) {
7,673✔
1354
        const char *t;
7,673✔
1355

1356
        assert(path);
7,673✔
1357
        assert(ret);
7,673✔
1358

1359
        t = skip_user_prefix(path);
7,673✔
1360
        if (!t)
7,673✔
1361
                return -ENXIO;
1362

1363
        /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
1364
         * parser. */
1365
        return cg_path_get_unit(t, ret);
209✔
1366
}
1367

1368
int cg_pid_get_user_unit(pid_t pid, char **ret_unit) {
117✔
1369
        _cleanup_free_ char *cgroup = NULL;
117✔
1370
        int r;
117✔
1371

1372
        assert(ret_unit);
117✔
1373

1374
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
117✔
1375
        if (r < 0)
117✔
1376
                return r;
1377

1378
        return cg_path_get_user_unit(cgroup, ret_unit);
117✔
1379
}
1380

1381
int cg_path_get_machine_name(const char *path, char **ret_machine) {
101✔
1382
        _cleanup_free_ char *u = NULL;
101✔
1383
        const char *sl;
101✔
1384
        int r;
101✔
1385

1386
        r = cg_path_get_unit(path, &u);
101✔
1387
        if (r < 0)
101✔
1388
                return r;
1389

1390
        sl = strjoina("/run/systemd/machines/unit:", u);
505✔
1391
        return readlink_malloc(sl, ret_machine);
101✔
1392
}
1393

1394
int cg_pid_get_machine_name(pid_t pid, char **ret_machine) {
101✔
1395
        _cleanup_free_ char *cgroup = NULL;
101✔
1396
        int r;
101✔
1397

1398
        assert(ret_machine);
101✔
1399

1400
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
101✔
1401
        if (r < 0)
101✔
1402
                return r;
1403

1404
        return cg_path_get_machine_name(cgroup, ret_machine);
101✔
1405
}
1406

1407
int cg_path_get_session(const char *path, char **ret_session) {
8,356✔
1408
        _cleanup_free_ char *unit = NULL;
8,356✔
1409
        char *start, *end;
8,356✔
1410
        int r;
8,356✔
1411

1412
        assert(path);
8,356✔
1413

1414
        r = cg_path_get_unit(path, &unit);
8,356✔
1415
        if (r < 0)
8,356✔
1416
                return r;
1417

1418
        start = startswith(unit, "session-");
8,355✔
1419
        if (!start)
8,355✔
1420
                return -ENXIO;
1421
        end = endswith(start, ".scope");
251✔
1422
        if (!end)
251✔
1423
                return -ENXIO;
1424

1425
        *end = 0;
251✔
1426
        if (!session_id_valid(start))
251✔
1427
                return -ENXIO;
1428

1429
        if (!ret_session)
250✔
1430
                return 0;
1431

1432
        return strdup_to(ret_session, start);
250✔
1433
}
1434

1435
int cg_pid_get_session(pid_t pid, char **ret_session) {
740✔
1436
        _cleanup_free_ char *cgroup = NULL;
740✔
1437
        int r;
740✔
1438

1439
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
740✔
1440
        if (r < 0)
740✔
1441
                return r;
1442

1443
        return cg_path_get_session(cgroup, ret_session);
740✔
1444
}
1445

1446
int cg_pidref_get_session(const PidRef *pidref, char **ret) {
257✔
1447
        int r;
257✔
1448

1449
        if (!pidref_is_set(pidref))
257✔
1450
                return -ESRCH;
257✔
1451
        if (pidref_is_remote(pidref))
514✔
1452
                return -EREMOTE;
1453

1454
        _cleanup_free_ char *session = NULL;
257✔
1455
        r = cg_pid_get_session(pidref->pid, &session);
257✔
1456
        if (r < 0)
257✔
1457
                return r;
1458

1459
        r = pidref_verify(pidref);
210✔
1460
        if (r < 0)
210✔
1461
                return r;
1462

1463
        if (ret)
210✔
1464
                *ret = TAKE_PTR(session);
210✔
1465
        return 0;
1466
}
1467

1468
int cg_path_get_owner_uid(const char *path, uid_t *ret_uid) {
8,032✔
1469
        _cleanup_free_ char *slice = NULL;
8,032✔
1470
        char *start, *end;
8,032✔
1471
        int r;
8,032✔
1472

1473
        assert(path);
8,032✔
1474

1475
        r = cg_path_get_slice(path, &slice);
8,032✔
1476
        if (r < 0)
8,032✔
1477
                return r;
1478

1479
        start = startswith(slice, "user-");
8,032✔
1480
        if (!start)
8,032✔
1481
                return -ENXIO;
1482

1483
        end = endswith(start, ".slice");
417✔
1484
        if (!end)
417✔
1485
                return -ENXIO;
1486

1487
        *end = 0;
417✔
1488
        if (parse_uid(start, ret_uid) < 0)
417✔
1489
                return -ENXIO;
×
1490

1491
        return 0;
1492
}
1493

1494
int cg_pid_get_owner_uid(pid_t pid, uid_t *ret_uid) {
435✔
1495
        _cleanup_free_ char *cgroup = NULL;
435✔
1496
        int r;
435✔
1497

1498
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
435✔
1499
        if (r < 0)
435✔
1500
                return r;
1501

1502
        return cg_path_get_owner_uid(cgroup, ret_uid);
435✔
1503
}
1504

1505
int cg_pidref_get_owner_uid(const PidRef *pidref, uid_t *ret) {
45✔
1506
        int r;
45✔
1507

1508
        if (!pidref_is_set(pidref))
45✔
1509
                return -ESRCH;
45✔
1510
        if (pidref_is_remote(pidref))
45✔
1511
                return -EREMOTE;
1512

1513
        uid_t uid;
45✔
1514
        r = cg_pid_get_owner_uid(pidref->pid, &uid);
45✔
1515
        if (r < 0)
45✔
1516
                return r;
1517

1518
        r = pidref_verify(pidref);
8✔
1519
        if (r < 0)
8✔
1520
                return r;
1521

1522
        if (ret)
8✔
1523
                *ret = uid;
8✔
1524

1525
        return 0;
1526
}
1527

1528
int cg_path_get_slice(const char *p, char **ret_slice) {
15,900✔
1529
        const char *e = NULL;
15,900✔
1530

1531
        assert(p);
15,900✔
1532
        assert(ret_slice);
15,900✔
1533

1534
        /* Finds the right-most slice unit from the beginning, but stops before we come to
1535
         * the first non-slice unit. */
1536

1537
        for (;;) {
48,588✔
1538
                const char *s;
32,244✔
1539
                int n;
32,244✔
1540

1541
                n = path_find_first_component(&p, /* accept_dot_dot = */ false, &s);
32,244✔
1542
                if (n < 0)
32,244✔
1543
                        return n;
×
1544
                if (!valid_slice_name(s, n))
32,244✔
1545
                        break;
1546

1547
                e = s;
16,344✔
1548
        }
1549

1550
        if (e)
15,900✔
1551
                return cg_path_decode_unit(e, ret_slice);
15,675✔
1552

1553
        return strdup_to(ret_slice, SPECIAL_ROOT_SLICE);
225✔
1554
}
1555

1556
int cg_pid_get_slice(pid_t pid, char **ret_slice) {
121✔
1557
        _cleanup_free_ char *cgroup = NULL;
121✔
1558
        int r;
121✔
1559

1560
        assert(ret_slice);
121✔
1561

1562
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
121✔
1563
        if (r < 0)
121✔
1564
                return r;
1565

1566
        return cg_path_get_slice(cgroup, ret_slice);
121✔
1567
}
1568

1569
int cg_path_get_user_slice(const char *p, char **ret_slice) {
7,556✔
1570
        const char *t;
7,556✔
1571
        assert(p);
7,556✔
1572
        assert(ret_slice);
7,556✔
1573

1574
        t = skip_user_prefix(p);
7,556✔
1575
        if (!t)
7,556✔
1576
                return -ENXIO;
1577

1578
        /* And now it looks pretty much the same as for a system slice, so let's just use the same parser
1579
         * from here on. */
1580
        return cg_path_get_slice(t, ret_slice);
197✔
1581
}
1582

1583
int cg_pid_get_user_slice(pid_t pid, char **ret_slice) {
×
1584
        _cleanup_free_ char *cgroup = NULL;
×
1585
        int r;
×
1586

1587
        assert(ret_slice);
×
1588

1589
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
×
1590
        if (r < 0)
×
1591
                return r;
1592

1593
        return cg_path_get_user_slice(cgroup, ret_slice);
×
1594
}
1595

1596
bool cg_needs_escape(const char *p) {
17,405✔
1597

1598
        /* Checks if the specified path is a valid cgroup name by our rules, or if it must be escaped. Note
1599
         * that we consider escaped cgroup names invalid here, as they need to be escaped a second time if
1600
         * they shall be used. Also note that various names cannot be made valid by escaping even if we
1601
         * return true here (because too long, or contain the forbidden character "/"). */
1602

1603
        if (!filename_is_valid(p))
17,405✔
1604
                return true;
1605

1606
        if (IN_SET(p[0], '_', '.'))
17,401✔
1607
                return true;
1608

1609
        if (STR_IN_SET(p, "notify_on_release", "release_agent", "tasks"))
17,395✔
1610
                return true;
2✔
1611

1612
        if (startswith(p, "cgroup."))
17,393✔
1613
                return true;
1614

1615
        for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
243,474✔
1616
                const char *q;
226,083✔
1617

1618
                q = startswith(p, cgroup_controller_to_string(c));
226,083✔
1619
                if (!q)
226,083✔
1620
                        continue;
226,083✔
1621

1622
                if (q[0] == '.')
×
1623
                        return true;
1624
        }
1625

1626
        return false;
1627
}
1628

1629
int cg_escape(const char *p, char **ret) {
17,122✔
1630
        _cleanup_free_ char *n = NULL;
17,122✔
1631

1632
        /* This implements very minimal escaping for names to be used as file names in the cgroup tree: any
1633
         * name which might conflict with a kernel name or is prefixed with '_' is prefixed with a '_'. That
1634
         * way, when reading cgroup names it is sufficient to remove a single prefixing underscore if there
1635
         * is one. */
1636

1637
        /* The return value of this function (unlike cg_unescape()) needs free()! */
1638

1639
        if (cg_needs_escape(p)) {
17,122✔
1640
                n = strjoin("_", p);
7✔
1641
                if (!n)
7✔
1642
                        return -ENOMEM;
1643

1644
                if (!filename_is_valid(n)) /* became invalid due to the prefixing? Or contained things like a slash that cannot be fixed by prefixing? */
7✔
1645
                        return -EINVAL;
1646
        } else {
1647
                n = strdup(p);
17,115✔
1648
                if (!n)
17,115✔
1649
                        return -ENOMEM;
1650
        }
1651

1652
        *ret = TAKE_PTR(n);
17,122✔
1653
        return 0;
17,122✔
1654
}
1655

1656
char* cg_unescape(const char *p) {
100,103✔
1657
        assert(p);
100,103✔
1658

1659
        /* The return value of this function (unlike cg_escape())
1660
         * doesn't need free()! */
1661

1662
        if (p[0] == '_')
100,103✔
1663
                return (char*) p+1;
14✔
1664

1665
        return (char*) p;
1666
}
1667

1668
#define CONTROLLER_VALID                        \
1669
        DIGITS LETTERS                          \
1670
        "_"
1671

1672
bool cg_controller_is_valid(const char *p) {
402,000✔
1673
        const char *t, *s;
402,000✔
1674

1675
        if (!p)
402,000✔
1676
                return false;
1677

1678
        if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
402,000✔
1679
                return true;
1680

1681
        s = startswith(p, "name=");
118,887✔
1682
        if (s)
118,887✔
1683
                p = s;
32✔
1684

1685
        if (IN_SET(*p, 0, '_'))
118,887✔
1686
                return false;
1687

1688
        for (t = p; *t; t++)
762,555✔
1689
                if (!strchr(CONTROLLER_VALID, *t))
643,679✔
1690
                        return false;
1691

1692
        if (t - p > NAME_MAX)
118,876✔
1693
                return false;
×
1694

1695
        return true;
1696
}
1697

1698
int cg_slice_to_path(const char *unit, char **ret) {
7,526✔
1699
        _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
7,526✔
1700
        const char *dash;
7,526✔
1701
        int r;
7,526✔
1702

1703
        assert(unit);
7,526✔
1704
        assert(ret);
7,526✔
1705

1706
        if (streq(unit, SPECIAL_ROOT_SLICE))
7,526✔
1707
                return strdup_to(ret, "");
7✔
1708

1709
        if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
7,519✔
1710
                return -EINVAL;
1711

1712
        if (!endswith(unit, ".slice"))
7,508✔
1713
                return -EINVAL;
1714

1715
        r = unit_name_to_prefix(unit, &p);
7,507✔
1716
        if (r < 0)
7,507✔
1717
                return r;
1718

1719
        dash = strchr(p, '-');
7,507✔
1720

1721
        /* Don't allow initial dashes */
1722
        if (dash == p)
7,507✔
1723
                return -EINVAL;
1724

1725
        while (dash) {
7,740✔
1726
                _cleanup_free_ char *escaped = NULL;
238✔
1727
                char n[dash - p + sizeof(".slice")];
238✔
1728

1729
#if HAS_FEATURE_MEMORY_SANITIZER
1730
                /* msan doesn't instrument stpncpy, so it thinks
1731
                 * n is later used uninitialized:
1732
                 * https://github.com/google/sanitizers/issues/926
1733
                 */
1734
                zero(n);
1735
#endif
1736

1737
                /* Don't allow trailing or double dashes */
1738
                if (IN_SET(dash[1], 0, '-'))
238✔
1739
                        return -EINVAL;
1740

1741
                strcpy(stpncpy(n, p, dash - p), ".slice");
236✔
1742
                if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
236✔
1743
                        return -EINVAL;
1744

1745
                r = cg_escape(n, &escaped);
236✔
1746
                if (r < 0)
236✔
1747
                        return r;
1748

1749
                if (!strextend(&s, escaped, "/"))
236✔
1750
                        return -ENOMEM;
1751

1752
                dash = strchr(dash+1, '-');
236✔
1753
        }
1754

1755
        r = cg_escape(unit, &e);
7,502✔
1756
        if (r < 0)
7,502✔
1757
                return r;
1758

1759
        if (!strextend(&s, e))
7,502✔
1760
                return -ENOMEM;
1761

1762
        *ret = TAKE_PTR(s);
7,502✔
1763
        return 0;
7,502✔
1764
}
1765

1766
int cg_is_threaded(const char *path) {
×
1767
        _cleanup_free_ char *fs = NULL, *contents = NULL;
×
1768
        _cleanup_strv_free_ char **v = NULL;
×
1769
        int r;
×
1770

1771
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.type", &fs);
×
1772
        if (r < 0)
×
1773
                return r;
1774

1775
        r = read_full_virtual_file(fs, &contents, NULL);
×
1776
        if (r == -ENOENT)
×
1777
                return false; /* Assume no. */
1778
        if (r < 0)
×
1779
                return r;
1780

1781
        v = strv_split(contents, NULL);
×
1782
        if (!v)
×
1783
                return -ENOMEM;
1784

1785
        /* If the cgroup is in the threaded mode, it contains "threaded".
1786
         * If one of the parents or siblings is in the threaded mode, it may contain "invalid". */
1787
        return strv_contains(v, "threaded") || strv_contains(v, "invalid");
×
1788
}
1789

1790
int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
48,981✔
1791
        _cleanup_free_ char *p = NULL;
48,981✔
1792
        int r;
48,981✔
1793

1794
        r = cg_get_path(controller, path, attribute, &p);
48,981✔
1795
        if (r < 0)
48,981✔
1796
                return r;
1797

1798
        return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
48,981✔
1799
}
1800

1801
int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
28,834✔
1802
        _cleanup_free_ char *p = NULL;
28,834✔
1803
        int r;
28,834✔
1804

1805
        r = cg_get_path(controller, path, attribute, &p);
28,834✔
1806
        if (r < 0)
28,834✔
1807
                return r;
1808

1809
        return read_one_line_file(p, ret);
28,834✔
1810
}
1811

1812
int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret) {
24,992✔
1813
        _cleanup_free_ char *value = NULL;
24,992✔
1814
        uint64_t v;
24,992✔
1815
        int r;
24,992✔
1816

1817
        assert(ret);
24,992✔
1818

1819
        r = cg_get_attribute(controller, path, attribute, &value);
24,992✔
1820
        if (r == -ENOENT)
24,992✔
1821
                return -ENODATA;
1822
        if (r < 0)
21,868✔
1823
                return r;
1824

1825
        if (streq(value, "max")) {
21,868✔
1826
                *ret = CGROUP_LIMIT_MAX;
4,669✔
1827
                return 0;
4,669✔
1828
        }
1829

1830
        r = safe_atou64(value, &v);
17,199✔
1831
        if (r < 0)
17,199✔
1832
                return r;
1833

1834
        *ret = v;
17,199✔
1835
        return 0;
17,199✔
1836
}
1837

1838
int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret) {
57✔
1839
        _cleanup_free_ char *value = NULL;
57✔
1840
        int r;
57✔
1841

1842
        assert(ret);
57✔
1843

1844
        r = cg_get_attribute(controller, path, attribute, &value);
57✔
1845
        if (r == -ENOENT)
57✔
1846
                return -ENODATA;
1847
        if (r < 0)
57✔
1848
                return r;
1849

1850
        r = parse_boolean(value);
57✔
1851
        if (r < 0)
57✔
1852
                return r;
1853

1854
        *ret = r;
57✔
1855
        return 0;
57✔
1856
}
1857

1858
int cg_get_owner(const char *path, uid_t *ret_uid) {
35✔
1859
        _cleanup_free_ char *f = NULL;
35✔
1860
        struct stat stats;
35✔
1861
        int r;
35✔
1862

1863
        assert(ret_uid);
35✔
1864

1865
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &f);
35✔
1866
        if (r < 0)
35✔
1867
                return r;
1868

1869
        if (stat(f, &stats) < 0)
35✔
1870
                return -errno;
16✔
1871

1872
        r = stat_verify_directory(&stats);
19✔
1873
        if (r < 0)
19✔
1874
                return r;
1875

1876
        *ret_uid = stats.st_uid;
19✔
1877
        return 0;
19✔
1878
}
1879

1880
int cg_get_keyed_attribute_full(
34,473✔
1881
                const char *controller,
1882
                const char *path,
1883
                const char *attribute,
1884
                char **keys,
1885
                char **ret_values,
1886
                CGroupKeyMode mode) {
1887

1888
        _cleanup_free_ char *filename = NULL, *contents = NULL;
34,473✔
1889
        const char *p;
34,473✔
1890
        size_t n, i, n_done = 0;
34,473✔
1891
        char **v;
34,473✔
1892
        int r;
34,473✔
1893

1894
        /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
1895
         * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
1896
         * entries as 'keys'. On success each entry will be set to the value of the matching key.
1897
         *
1898
         * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. If mode
1899
         * is set to GG_KEY_MODE_GRACEFUL we ignore missing keys and return those that were parsed successfully. */
1900

1901
        r = cg_get_path(controller, path, attribute, &filename);
34,473✔
1902
        if (r < 0)
34,473✔
1903
                return r;
1904

1905
        r = read_full_file(filename, &contents, NULL);
34,473✔
1906
        if (r < 0)
34,473✔
1907
                return r;
1908

1909
        n = strv_length(keys);
27,957✔
1910
        if (n == 0) /* No keys to retrieve? That's easy, we are done then */
27,957✔
1911
                return 0;
1912

1913
        /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
1914
        v = newa0(char*, n);
27,957✔
1915

1916
        for (p = contents; *p;) {
95,517✔
1917
                const char *w = NULL;
1918

1919
                for (i = 0; i < n; i++)
163,077✔
1920
                        if (!v[i]) {
105,373✔
1921
                                w = first_word(p, keys[i]);
95,517✔
1922
                                if (w)
95,517✔
1923
                                        break;
1924
                        }
1925

1926
                if (w) {
95,517✔
1927
                        size_t l;
37,813✔
1928

1929
                        l = strcspn(w, NEWLINE);
37,813✔
1930
                        v[i] = strndup(w, l);
37,813✔
1931
                        if (!v[i]) {
37,813✔
1932
                                r = -ENOMEM;
×
1933
                                goto fail;
×
1934
                        }
1935

1936
                        n_done++;
37,813✔
1937
                        if (n_done >= n)
37,813✔
1938
                                goto done;
27,957✔
1939

1940
                        p = w + l;
9,856✔
1941
                } else
1942
                        p += strcspn(p, NEWLINE);
57,704✔
1943

1944
                p += strspn(p, NEWLINE);
67,560✔
1945
        }
1946

1947
        if (mode & CG_KEY_MODE_GRACEFUL)
×
1948
                goto done;
×
1949

1950
        r = -ENXIO;
1951

1952
fail:
×
1953
        free_many_charp(v, n);
34,473✔
1954
        return r;
1955

1956
done:
27,957✔
1957
        memcpy(ret_values, v, sizeof(char*) * n);
27,957✔
1958
        if (mode & CG_KEY_MODE_GRACEFUL)
27,957✔
1959
                return n_done;
9,856✔
1960

1961
        return 0;
1962
}
1963

1964
int cg_mask_to_string(CGroupMask mask, char **ret) {
16,863✔
1965
        _cleanup_free_ char *s = NULL;
16,863✔
1966
        bool space = false;
16,863✔
1967
        CGroupController c;
16,863✔
1968
        size_t n = 0;
16,863✔
1969

1970
        assert(ret);
16,863✔
1971

1972
        if (mask == 0) {
16,863✔
1973
                *ret = NULL;
8,081✔
1974
                return 0;
8,081✔
1975
        }
1976

1977
        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
122,948✔
1978
                const char *k;
114,166✔
1979
                size_t l;
114,166✔
1980

1981
                if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
114,166✔
1982
                        continue;
35,641✔
1983

1984
                k = cgroup_controller_to_string(c);
78,525✔
1985
                l = strlen(k);
78,525✔
1986

1987
                if (!GREEDY_REALLOC(s, n + space + l + 1))
78,525✔
1988
                        return -ENOMEM;
1989

1990
                if (space)
78,525✔
1991
                        s[n] = ' ';
69,743✔
1992
                memcpy(s + n + space, k, l);
78,525✔
1993
                n += space + l;
78,525✔
1994

1995
                space = true;
78,525✔
1996
        }
1997

1998
        assert(s);
8,782✔
1999

2000
        s[n] = 0;
8,782✔
2001
        *ret = TAKE_PTR(s);
8,782✔
2002

2003
        return 0;
8,782✔
2004
}
2005

2006
int cg_mask_from_string(const char *value, CGroupMask *ret) {
6,448✔
2007
        CGroupMask m = 0;
6,448✔
2008

2009
        assert(ret);
6,448✔
2010
        assert(value);
6,448✔
2011

2012
        for (;;) {
57,874✔
2013
                _cleanup_free_ char *n = NULL;
51,426✔
2014
                CGroupController v;
57,874✔
2015
                int r;
57,874✔
2016

2017
                r = extract_first_word(&value, &n, NULL, 0);
57,874✔
2018
                if (r < 0)
57,874✔
2019
                        return r;
×
2020
                if (r == 0)
57,874✔
2021
                        break;
2022

2023
                v = cgroup_controller_from_string(n);
51,426✔
2024
                if (v < 0)
51,426✔
2025
                        continue;
725✔
2026

2027
                m |= CGROUP_CONTROLLER_TO_MASK(v);
50,701✔
2028
        }
2029

2030
        *ret = m;
6,448✔
2031
        return 0;
6,448✔
2032
}
2033

2034
int cg_mask_supported_subtree(const char *root, CGroupMask *ret) {
509✔
2035
        CGroupMask mask;
509✔
2036
        int r;
509✔
2037

2038
        /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
2039
         * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
2040
         * pseudo-controllers. */
2041

2042
        r = cg_all_unified();
509✔
2043
        if (r < 0)
509✔
2044
                return r;
509✔
2045
        if (r > 0) {
509✔
2046
                _cleanup_free_ char *controllers = NULL, *path = NULL;
509✔
2047

2048
                /* In the unified hierarchy we can read the supported and accessible controllers from
2049
                 * the top-level cgroup attribute */
2050

2051
                r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
509✔
2052
                if (r < 0)
509✔
2053
                        return r;
2054

2055
                r = read_one_line_file(path, &controllers);
509✔
2056
                if (r < 0)
509✔
2057
                        return r;
2058

2059
                r = cg_mask_from_string(controllers, &mask);
509✔
2060
                if (r < 0)
509✔
2061
                        return r;
2062

2063
                /* Mask controllers that are not supported in unified hierarchy. */
2064
                mask &= CGROUP_MASK_V2;
509✔
2065

2066
        } else {
2067
                CGroupController c;
×
2068

2069
                /* In the legacy hierarchy, we check which hierarchies are accessible. */
2070

2071
                mask = 0;
×
2072
                for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
×
2073
                        CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
×
2074
                        const char *n;
×
2075

2076
                        if (!FLAGS_SET(CGROUP_MASK_V1, bit))
×
2077
                                continue;
×
2078

2079
                        n = cgroup_controller_to_string(c);
×
2080
                        if (controller_is_v1_accessible(root, n) >= 0)
×
2081
                                mask |= bit;
×
2082
                }
2083
        }
2084

2085
        *ret = mask;
509✔
2086
        return 0;
509✔
2087
}
2088

2089
int cg_mask_supported(CGroupMask *ret) {
247✔
2090
        _cleanup_free_ char *root = NULL;
247✔
2091
        int r;
247✔
2092

2093
        r = cg_get_root_path(&root);
247✔
2094
        if (r < 0)
247✔
2095
                return r;
2096

2097
        return cg_mask_supported_subtree(root, ret);
247✔
2098
}
2099

2100
int cg_kernel_controllers(Set **ret) {
×
2101
        _cleanup_set_free_ Set *controllers = NULL;
×
2102
        _cleanup_fclose_ FILE *f = NULL;
×
2103
        int r;
×
2104

2105
        assert(ret);
×
2106

2107
        /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
2108
         * and controllers that aren't currently accessible (because not mounted). This does not include "name="
2109
         * pseudo-controllers. */
2110

2111
        r = fopen_unlocked("/proc/cgroups", "re", &f);
×
2112
        if (r == -ENOENT) {
×
2113
                *ret = NULL;
×
2114
                return 0;
×
2115
        }
2116
        if (r < 0)
×
2117
                return r;
2118

2119
        /* Ignore the header line */
2120
        (void) read_line(f, SIZE_MAX, NULL);
×
2121

2122
        for (;;) {
×
2123
                _cleanup_free_ char *controller = NULL;
×
2124
                int enabled = 0;
×
2125

2126
                if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
×
2127

2128
                        if (ferror(f))
×
2129
                                return -errno;
×
2130

2131
                        if (feof(f))
×
2132
                                break;
2133

2134
                        return -EBADMSG;
2135
                }
2136

2137
                if (!enabled)
×
2138
                        continue;
×
2139

2140
                if (!cg_controller_is_valid(controller))
×
2141
                        return -EBADMSG;
2142

2143
                r = set_ensure_consume(&controllers, &string_hash_ops_free, TAKE_PTR(controller));
×
2144
                if (r < 0)
×
2145
                        return r;
2146
        }
2147

2148
        *ret = TAKE_PTR(controllers);
×
2149

2150
        return 0;
×
2151
}
2152

2153
/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on
2154
 * /sys/fs/cgroup/systemd. This unfortunately broke other tools (such as docker) which expected the v1
2155
 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mounts v2 on
2156
 * /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility
2157
 * with other tools.
2158
 *
2159
 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep
2160
 * cgroup v2 process management but disable the compat dual layout, we return true on
2161
 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and false on cg_hybrid_unified().
2162
 */
2163
static thread_local bool unified_systemd_v232;
2164

2165
int cg_unified_cached(bool flush) {
662,235✔
2166
        static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
662,235✔
2167

2168
        struct statfs fs;
662,235✔
2169

2170
        /* Checks if we support the unified hierarchy. Returns an
2171
         * error when the cgroup hierarchies aren't mounted yet or we
2172
         * have any other trouble determining if the unified hierarchy
2173
         * is supported. */
2174

2175
        if (flush)
662,235✔
2176
                unified_cache = CGROUP_UNIFIED_UNKNOWN;
17,937✔
2177
        else if (unified_cache >= CGROUP_UNIFIED_NONE)
644,298✔
2178
                return unified_cache;
662,235✔
2179

2180
        if (statfs("/sys/fs/cgroup/", &fs) < 0)
32,875✔
2181
                return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
×
2182

2183
        if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
32,875✔
2184
                log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
32,875✔
2185
                unified_cache = CGROUP_UNIFIED_ALL;
32,875✔
UNCOV
2186
        } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
×
2187
                if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
×
2188
                    F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
×
2189
                        log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
×
2190
                        unified_cache = CGROUP_UNIFIED_SYSTEMD;
×
2191
                        unified_systemd_v232 = false;
×
2192
                } else {
2193
                        if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) {
×
2194
                                if (errno == ENOENT) {
×
2195
                                        /* Some other software may have set up /sys/fs/cgroup in a configuration we do not recognize. */
2196
                                        log_debug_errno(errno, "Unsupported cgroupsv1 setup detected: name=systemd hierarchy not found.");
×
2197
                                        return -ENOMEDIUM;
×
2198
                                }
2199
                                return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
×
2200
                        }
2201

2202
                        if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
×
2203
                                log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
×
2204
                                unified_cache = CGROUP_UNIFIED_SYSTEMD;
×
2205
                                unified_systemd_v232 = true;
×
2206
                        } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
×
2207
                                log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
×
2208
                                unified_cache = CGROUP_UNIFIED_NONE;
×
2209
                        } else {
2210
                                log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
×
2211
                                          (unsigned long long) fs.f_type);
2212
                                unified_cache = CGROUP_UNIFIED_NONE;
×
2213
                        }
2214
                }
UNCOV
2215
        } else if (F_TYPE_EQUAL(fs.f_type, SYSFS_MAGIC)) {
×
UNCOV
2216
                return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
×
2217
                                       "No filesystem is currently mounted on /sys/fs/cgroup.");
2218
        } else
2219
                return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
×
2220
                                       "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2221
                                       (unsigned long long)fs.f_type);
2222

2223
        return unified_cache;
32,875✔
2224
}
2225

2226
int cg_unified_controller(const char *controller) {
103,351✔
2227
        int r;
103,351✔
2228

2229
        r = cg_unified_cached(false);
103,351✔
2230
        if (r < 0)
103,351✔
2231
                return r;
2232

2233
        if (r == CGROUP_UNIFIED_NONE)
103,351✔
2234
                return false;
2235

2236
        if (r >= CGROUP_UNIFIED_ALL)
103,351✔
2237
                return true;
2238

2239
        return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
×
2240
}
2241

2242
int cg_all_unified(void) {
514,524✔
2243
        int r;
514,524✔
2244

2245
        r = cg_unified_cached(false);
514,524✔
2246
        if (r < 0)
514,524✔
2247
                return r;
2248

2249
        return r >= CGROUP_UNIFIED_ALL;
514,524✔
2250
}
2251

2252
int cg_hybrid_unified(void) {
26,422✔
2253
        int r;
26,422✔
2254

2255
        r = cg_unified_cached(false);
26,422✔
2256
        if (r < 0)
26,422✔
2257
                return r;
2258

2259
        return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
26,422✔
2260
}
2261

2262
int cg_is_delegated(const char *path) {
19✔
2263
        int r;
19✔
2264

2265
        assert(path);
19✔
2266

2267
        r = cg_get_xattr_bool(path, "trusted.delegate");
19✔
2268
        if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
19✔
2269
                return r;
2270

2271
        /* If the trusted xattr isn't set (preferred), then check the untrusted one. Under the assumption
2272
         * that whoever is trusted enough to own the cgroup, is also trusted enough to decide if it is
2273
         * delegated or not this should be safe. */
2274
        r = cg_get_xattr_bool(path, "user.delegate");
6✔
2275
        return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
6✔
2276
}
2277

2278
int cg_is_delegated_fd(int fd) {
214✔
2279
        int r;
214✔
2280

2281
        assert(fd >= 0);
214✔
2282

2283
        r = getxattr_at_bool(fd, /* path= */ NULL, "trusted.delegate", /* flags= */ 0);
214✔
2284
        if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
214✔
2285
                return r;
2286

2287
        r = getxattr_at_bool(fd, /* path= */ NULL, "user.delegate", /* flags= */ 0);
200✔
2288
        return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
200✔
2289
}
2290

2291
int cg_has_coredump_receive(const char *path) {
2✔
2292
        int r;
2✔
2293

2294
        assert(path);
2✔
2295

2296
        r = cg_get_xattr_bool(path, "user.coredump_receive");
2✔
2297
        if (ERRNO_IS_NEG_XATTR_ABSENT(r))
2✔
2298
                return false;
×
2299

2300
        return r;
2301
}
2302

2303
const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2304
        [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
2305
        [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
2306
        [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
2307
        [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
2308
};
2309

2310
static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2311
        [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
2312
        [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
2313
        [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
2314
        [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
2315
};
2316

2317
DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
4,325✔
2318

2319
static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2320
        [CGROUP_CONTROLLER_CPU] = "cpu",
2321
        [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2322
        [CGROUP_CONTROLLER_CPUSET] = "cpuset",
2323
        [CGROUP_CONTROLLER_IO] = "io",
2324
        [CGROUP_CONTROLLER_BLKIO] = "blkio",
2325
        [CGROUP_CONTROLLER_MEMORY] = "memory",
2326
        [CGROUP_CONTROLLER_DEVICES] = "devices",
2327
        [CGROUP_CONTROLLER_PIDS] = "pids",
2328
        [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
2329
        [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
2330
        [CGROUP_CONTROLLER_BPF_FOREIGN] = "bpf-foreign",
2331
        [CGROUP_CONTROLLER_BPF_SOCKET_BIND] = "bpf-socket-bind",
2332
        [CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES] = "bpf-restrict-network-interfaces",
2333
};
2334

2335
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
386,888✔
2336

2337
CGroupMask get_cpu_accounting_mask(void) {
1,719,571✔
2338
        static CGroupMask needed_mask = (CGroupMask) -1;
1,719,571✔
2339

2340
        /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
2341
         * provided externally from the CPU controller, which means we don't
2342
         * need to enable the CPU controller just to get metrics. This is good,
2343
         * because enabling the CPU controller comes at a minor performance
2344
         * hit, especially when it's propagated deep into large hierarchies.
2345
         * There's also no separate CPU accounting controller available within
2346
         * a unified hierarchy.
2347
         *
2348
         * This combination of factors results in the desired cgroup mask to
2349
         * enable for CPU accounting varying as follows:
2350
         *
2351
         *                   ╔═════════════════════╤═════════════════════╗
2352
         *                   ║     Linux ≥4.15     │     Linux <4.15     ║
2353
         *   ╔═══════════════╬═════════════════════╪═════════════════════╣
2354
         *   ║ Unified       ║ nothing             │ CGROUP_MASK_CPU     ║
2355
         *   ╟───────────────╫─────────────────────┼─────────────────────╢
2356
         *   ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
2357
         *   ╚═══════════════╩═════════════════════╧═════════════════════╝
2358
         *
2359
         * We check kernel version here instead of manually checking whether
2360
         * cpu.stat is present for every cgroup, as that check in itself would
2361
         * already be fairly expensive.
2362
         *
2363
         * Kernels where this patch has been backported will therefore have the
2364
         * CPU controller enabled unnecessarily. This is more expensive than
2365
         * necessary, but harmless. ☺️
2366
         */
2367

2368
        if (needed_mask == (CGroupMask) -1) {
1,719,571✔
2369
                if (cg_all_unified()) {
719✔
2370
                        struct utsname u;
719✔
2371
                        assert_se(uname(&u) >= 0);
719✔
2372

2373
                        if (strverscmp_improved(u.release, "4.15") < 0)
719✔
UNCOV
2374
                                needed_mask = CGROUP_MASK_CPU;
×
2375
                        else
2376
                                needed_mask = 0;
719✔
2377
                } else
UNCOV
2378
                        needed_mask = CGROUP_MASK_CPUACCT;
×
2379
        }
2380

2381
        return needed_mask;
1,719,571✔
2382
}
2383

2384
bool cpu_accounting_is_cheap(void) {
2,324✔
2385
        return get_cpu_accounting_mask() == 0;
2,324✔
2386
}
2387

2388
static const char* const managed_oom_mode_table[_MANAGED_OOM_MODE_MAX] = {
2389
        [MANAGED_OOM_AUTO] = "auto",
2390
        [MANAGED_OOM_KILL] = "kill",
2391
};
2392

2393
DEFINE_STRING_TABLE_LOOKUP(managed_oom_mode, ManagedOOMMode);
37,487✔
2394

2395
static const char* const managed_oom_preference_table[_MANAGED_OOM_PREFERENCE_MAX] = {
2396
        [MANAGED_OOM_PREFERENCE_NONE] = "none",
2397
        [MANAGED_OOM_PREFERENCE_AVOID] = "avoid",
2398
        [MANAGED_OOM_PREFERENCE_OMIT] = "omit",
2399
};
2400

2401
DEFINE_STRING_TABLE_LOOKUP(managed_oom_preference, ManagedOOMPreference);
18,524✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc