• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 13800668245

11 Mar 2025 09:26PM UTC coverage: 71.892% (-0.02%) from 71.913%
13800668245

push

github

yuwata
hostname: fix typo

Follow-up for af9c45d5b.

0 of 1 new or added line in 1 file covered. (0.0%)

3914 existing lines in 70 files now uncovered.

295869 of 411545 relevant lines covered (71.89%)

720055.3 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.86
/src/basic/cgroup-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <errno.h>
4
#include <limits.h>
5
#include <signal.h>
6
#include <stddef.h>
7
#include <stdlib.h>
8
#include <sys/types.h>
9
#include <sys/utsname.h>
10
#include <sys/xattr.h>
11
#include <threads.h>
12
#include <unistd.h>
13

14
#include "alloc-util.h"
15
#include "capsule-util.h"
16
#include "cgroup-util.h"
17
#include "constants.h"
18
#include "dirent-util.h"
19
#include "extract-word.h"
20
#include "fd-util.h"
21
#include "fileio.h"
22
#include "format-util.h"
23
#include "fs-util.h"
24
#include "log.h"
25
#include "login-util.h"
26
#include "macro.h"
27
#include "missing_fs.h"
28
#include "missing_magic.h"
29
#include "mkdir.h"
30
#include "parse-util.h"
31
#include "path-util.h"
32
#include "process-util.h"
33
#include "set.h"
34
#include "special.h"
35
#include "stat-util.h"
36
#include "stdio-util.h"
37
#include "string-table.h"
38
#include "string-util.h"
39
#include "strv.h"
40
#include "unit-name.h"
41
#include "user-util.h"
42
#include "xattr-util.h"
43

44
int cg_path_open(const char *controller, const char *path) {
780✔
45
        _cleanup_free_ char *fs = NULL;
780✔
46
        int r;
780✔
47

48
        r = cg_get_path(controller, path, /* item=*/ NULL, &fs);
780✔
49
        if (r < 0)
780✔
50
                return r;
51

52
        return RET_NERRNO(open(fs, O_DIRECTORY|O_CLOEXEC));
780✔
53
}
54

55
int cg_cgroupid_open(int cgroupfs_fd, uint64_t id) {
8✔
56
        _cleanup_close_ int fsfd = -EBADF;
8✔
57

58
        if (cgroupfs_fd < 0) {
8✔
59
                fsfd = open("/sys/fs/cgroup", O_CLOEXEC|O_DIRECTORY);
7✔
60
                if (fsfd < 0)
7✔
UNCOV
61
                        return -errno;
×
62

63
                cgroupfs_fd = fsfd;
64
        }
65

66
        cg_file_handle fh = CG_FILE_HANDLE_INIT;
8✔
67
        CG_FILE_HANDLE_CGROUPID(fh) = id;
8✔
68

69
        return RET_NERRNO(open_by_handle_at(cgroupfs_fd, &fh.file_handle, O_DIRECTORY|O_CLOEXEC));
14✔
70
}
71

72
int cg_path_from_cgroupid(int cgroupfs_fd, uint64_t id, char **ret) {
×
73
        _cleanup_close_ int cgfd = -EBADF;
×
UNCOV
74
        int r;
×
75

76
        cgfd = cg_cgroupid_open(cgroupfs_fd, id);
×
UNCOV
77
        if (cgfd < 0)
×
78
                return cgfd;
79

80
        _cleanup_free_ char *path = NULL;
×
81
        r = fd_get_path(cgfd, &path);
×
UNCOV
82
        if (r < 0)
×
83
                return r;
84

UNCOV
85
        if (!path_startswith(path, "/sys/fs/cgroup/"))
×
86
                return -EXDEV; /* recognizable error */
87

88
        if (ret)
×
UNCOV
89
                *ret = TAKE_PTR(path);
×
90
        return 0;
91
}
92

93
int cg_get_cgroupid_at(int dfd, const char *path, uint64_t *ret) {
5,747✔
94
        cg_file_handle fh = CG_FILE_HANDLE_INIT;
5,747✔
95
        int mnt_id;
5,747✔
96

97
        assert(dfd >= 0 || (dfd == AT_FDCWD && path_is_absolute(path)));
11,462✔
98
        assert(ret);
5,747✔
99

100
        /* This is cgroupfs so we know the size of the handle, thus no need to loop around like
101
         * name_to_handle_at_loop() does in mountpoint-util.c */
102
        if (name_to_handle_at(dfd, strempty(path), &fh.file_handle, &mnt_id, isempty(path) ? AT_EMPTY_PATH : 0) < 0) {
11,494✔
103
                assert(errno != EOVERFLOW);
×
UNCOV
104
                return -errno;
×
105
        }
106

107
        *ret = CG_FILE_HANDLE_CGROUPID(fh);
5,747✔
108
        return 0;
5,747✔
109
}
110

111
static int cg_enumerate_items(const char *controller, const char *path, FILE **ret, const char *item) {
29,896✔
112
        _cleanup_free_ char *fs = NULL;
29,896✔
113
        FILE *f;
29,896✔
114
        int r;
29,896✔
115

116
        assert(ret);
29,896✔
117

118
        r = cg_get_path(controller, path, item, &fs);
29,896✔
119
        if (r < 0)
29,896✔
120
                return r;
121

122
        f = fopen(fs, "re");
29,896✔
123
        if (!f)
29,896✔
124
                return -errno;
18,818✔
125

126
        *ret = f;
11,078✔
127
        return 0;
11,078✔
128
}
129

130
int cg_enumerate_processes(const char *controller, const char *path, FILE **ret) {
412✔
131
        return cg_enumerate_items(controller, path, ret, "cgroup.procs");
412✔
132
}
133

134
int cg_read_pid(FILE *f, pid_t *ret, CGroupFlags flags) {
21,919✔
135
        unsigned long ul;
21,919✔
136

137
        /* Note that the cgroup.procs might contain duplicates! See cgroups.txt for details. */
138

139
        assert(f);
21,919✔
140
        assert(ret);
21,919✔
141

142
        for (;;) {
21,919✔
143
                errno = 0;
21,919✔
144
                if (fscanf(f, "%lu", &ul) != 1) {
21,919✔
145

146
                        if (feof(f)) {
11,308✔
147
                                *ret = 0;
11,308✔
148
                                return 0;
11,308✔
149
                        }
150

UNCOV
151
                        return errno_or_else(EIO);
×
152
                }
153

154
                if (ul > PID_T_MAX)
10,611✔
155
                        return -EIO;
156

157
                /* In some circumstances (e.g. WSL), cgroups might contain unmappable PIDs from other
158
                 * contexts. These show up as zeros, and depending on the caller, can either be plain
159
                 * skipped over, or returned as-is. */
160
                if (ul == 0 && !FLAGS_SET(flags, CGROUP_DONT_SKIP_UNMAPPED))
10,611✔
UNCOV
161
                        continue;
×
162

163
                *ret = (pid_t) ul;
10,611✔
164
                return 1;
10,611✔
165
        }
166
}
167

168
int cg_read_pidref(FILE *f, PidRef *ret, CGroupFlags flags) {
14,949✔
169
        int r;
14,949✔
170

171
        assert(f);
14,949✔
172
        assert(ret);
14,949✔
173

UNCOV
174
        for (;;) {
×
175
                pid_t pid;
14,949✔
176

177
                r = cg_read_pid(f, &pid, flags);
14,949✔
178
                if (r < 0)
14,949✔
UNCOV
179
                        return log_debug_errno(r, "Failed to read pid from cgroup item: %m");
×
180
                if (r == 0) {
14,949✔
181
                        *ret = PIDREF_NULL;
10,684✔
182
                        return 0;
10,684✔
183
                }
184

185
                if (pid == 0)
4,265✔
186
                        return -EREMOTE;
187

188
                if (FLAGS_SET(flags, CGROUP_NO_PIDFD)) {
4,265✔
189
                        *ret = PIDREF_MAKE_FROM_PID(pid);
524✔
190
                        return 1;
524✔
191
                }
192

193
                r = pidref_set_pid(ret, pid);
3,741✔
194
                if (r >= 0)
3,741✔
195
                        return 1;
UNCOV
196
                if (r != -ESRCH)
×
197
                        return r;
198

199
                /* ESRCH → gone by now? just skip over it, read the next */
200
        }
201
}
202

203
int cg_read_event(
12,565✔
204
                const char *controller,
205
                const char *path,
206
                const char *event,
207
                char **ret) {
208

209
        _cleanup_free_ char *events = NULL, *content = NULL;
12,565✔
210
        int r;
12,565✔
211

212
        r = cg_get_path(controller, path, "cgroup.events", &events);
12,565✔
213
        if (r < 0)
12,565✔
214
                return r;
215

216
        r = read_full_virtual_file(events, &content, NULL);
12,565✔
217
        if (r < 0)
12,565✔
218
                return r;
219

220
        for (const char *p = content;;) {
4,799✔
221
                _cleanup_free_ char *line = NULL, *key = NULL;
4,799✔
222
                const char *q;
4,799✔
223

224
                r = extract_first_word(&p, &line, "\n", 0);
4,799✔
225
                if (r < 0)
4,799✔
226
                        return r;
227
                if (r == 0)
4,799✔
228
                        return -ENOENT;
229

230
                q = line;
4,799✔
231
                r = extract_first_word(&q, &key, " ", 0);
4,799✔
232
                if (r < 0)
4,799✔
233
                        return r;
234
                if (r == 0)
4,799✔
235
                        return -EINVAL;
236

237
                if (!streq(key, event))
4,799✔
UNCOV
238
                        continue;
×
239

240
                return strdup_to(ret, q);
4,799✔
241
        }
242
}
243

244
bool cg_ns_supported(void) {
589✔
245
        static thread_local int supported = -1;
589✔
246

247
        if (supported >= 0)
589✔
UNCOV
248
                return supported;
×
249

250
        if (access("/proc/self/ns/cgroup", F_OK) >= 0)
589✔
251
                return (supported = true);
589✔
252
        if (errno != ENOENT)
×
253
                log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
×
UNCOV
254
        return (supported = false);
×
255
}
256

257
bool cg_freezer_supported(void) {
×
UNCOV
258
        static thread_local int supported = -1;
×
259

260
        if (supported >= 0)
×
UNCOV
261
                return supported;
×
262

263
        if (cg_all_unified() <= 0)
×
UNCOV
264
                return (supported = false);
×
265

266
        if (access("/sys/fs/cgroup/init.scope/cgroup.freeze", F_OK) >= 0)
×
267
                return (supported = true);
×
268
        if (errno != ENOENT)
×
269
                log_debug_errno(errno, "Failed to check whether cgroup freezer is available, assuming not: %m");
×
UNCOV
270
        return (supported = false);
×
271
}
272

273
bool cg_kill_supported(void) {
×
UNCOV
274
        static thread_local int supported = -1;
×
275

276
        if (supported >= 0)
×
UNCOV
277
                return supported;
×
278

279
        if (cg_all_unified() <= 0)
×
UNCOV
280
                return (supported = false);
×
281

282
        if (access("/sys/fs/cgroup/init.scope/cgroup.kill", F_OK) >= 0)
×
283
                return (supported = true);
×
284
        if (errno != ENOENT)
×
285
                log_debug_errno(errno, "Failed to check whether cgroup.kill is available, assuming not: %m");
×
UNCOV
286
        return (supported = false);
×
287
}
288

289
int cg_enumerate_subgroups(const char *controller, const char *path, DIR **ret) {
27,615✔
290
        _cleanup_free_ char *fs = NULL;
27,615✔
291
        DIR *d;
27,615✔
292
        int r;
27,615✔
293

294
        assert(ret);
27,615✔
295

296
        /* This is not recursive! */
297

298
        r = cg_get_path(controller, path, NULL, &fs);
27,615✔
299
        if (r < 0)
27,615✔
300
                return r;
301

302
        d = opendir(fs);
27,615✔
303
        if (!d)
27,615✔
304
                return -errno;
16,171✔
305

306
        *ret = d;
11,444✔
307
        return 0;
11,444✔
308
}
309

310
int cg_read_subgroup(DIR *d, char **ret) {
16,959✔
311
        assert(d);
16,959✔
312
        assert(ret);
16,959✔
313

314
        FOREACH_DIRENT_ALL(de, d, return -errno) {
537,869✔
315
                if (de->d_type != DT_DIR)
526,195✔
316
                        continue;
497,562✔
317

318
                if (dot_or_dot_dot(de->d_name))
28,633✔
319
                        continue;
23,348✔
320

321
                return strdup_to_full(ret, de->d_name);
5,285✔
322
        }
323

324
        *ret = NULL;
11,674✔
325
        return 0;
11,674✔
326
}
327

328
static int cg_kill_items(
29,233✔
329
                const char *path,
330
                const char *item,
331
                int sig,
332
                CGroupFlags flags,
333
                Set *s,
334
                cg_kill_log_func_t log_kill,
335
                void *userdata) {
336

337
        _cleanup_set_free_ Set *allocated_set = NULL;
29,233✔
338
        int r, ret = 0;
29,233✔
339

340
        assert(path);
29,233✔
341
        assert(item);
29,233✔
342
        assert(sig >= 0);
29,233✔
343

344
         /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence
345
          * don't send SIGCONT on SIGKILL. */
346
        if (IN_SET(sig, SIGCONT, SIGKILL))
29,233✔
347
                flags &= ~CGROUP_SIGCONT;
10,988✔
348

349
        /* This goes through the tasks list and kills them all. This is repeated until no further processes
350
         * are added to the tasks list, to properly handle forking processes.
351
         *
352
         * When sending SIGKILL, prefer cg_kill_kernel_sigkill(), which is fully atomic. */
353

354
        if (!s) {
29,233✔
355
                s = allocated_set = set_new(NULL);
932✔
356
                if (!s)
932✔
357
                        return -ENOMEM;
358
        }
359

360
        bool done;
29,484✔
361
        do {
29,484✔
362
                _cleanup_fclose_ FILE *f = NULL;
18,818✔
363
                int ret_log_kill;
29,484✔
364

365
                done = true;
29,484✔
366

367
                r = cg_enumerate_items(SYSTEMD_CGROUP_CONTROLLER, path, &f, item);
29,484✔
368
                if (r == -ENOENT)
29,484✔
369
                        break;
370
                if (r < 0)
10,666✔
UNCOV
371
                        return RET_GATHER(ret, log_debug_errno(r, "Failed to enumerate cgroup items: %m"));
×
372

373
                for (;;) {
14,860✔
374
                        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
14,860✔
375

376
                        r = cg_read_pidref(f, &pidref, flags);
14,860✔
377
                        if (r < 0)
14,860✔
UNCOV
378
                                return RET_GATHER(ret, log_debug_errno(r, "Failed to read pidref from cgroup '%s': %m", path));
×
379
                        if (r == 0)
14,860✔
380
                                break;
381

382
                        if ((flags & CGROUP_IGNORE_SELF) && pidref_is_self(&pidref))
4,194✔
383
                                continue;
932✔
384

385
                        if (set_contains(s, PID_TO_PTR(pidref.pid)))
3,262✔
386
                                continue;
1,995✔
387

388
                        /* Ignore kernel threads to mimic the behavior of cgroup.kill. */
389
                        if (pidref_is_kernel_thread(&pidref) > 0) {
1,267✔
390
                                log_debug("Ignoring kernel thread with pid " PID_FMT " in cgroup '%s'", pidref.pid, path);
×
UNCOV
391
                                continue;
×
392
                        }
393

394
                        if (log_kill)
1,267✔
395
                                ret_log_kill = log_kill(&pidref, sig, userdata);
89✔
396

397
                        /* If we haven't killed this process yet, kill it */
398
                        r = pidref_kill(&pidref, sig);
1,267✔
399
                        if (r < 0 && r != -ESRCH)
1,267✔
UNCOV
400
                                RET_GATHER(ret, log_debug_errno(r, "Failed to kill process with pid " PID_FMT " from cgroup '%s': %m", pidref.pid, path));
×
401
                        if (r >= 0) {
1,267✔
402
                                if (flags & CGROUP_SIGCONT)
1,267✔
403
                                        (void) pidref_kill(&pidref, SIGCONT);
1,176✔
404

405
                                if (ret == 0) {
1,267✔
406
                                        if (log_kill)
319✔
407
                                                ret = ret_log_kill;
408
                                        else
409
                                                ret = 1;
230✔
410
                                }
411
                        }
412

413
                        done = false;
1,267✔
414

415
                        r = set_put(s, PID_TO_PTR(pidref.pid));
1,267✔
416
                        if (r < 0)
1,267✔
UNCOV
417
                                return RET_GATHER(ret, r);
×
418
                }
419

420
                /* To avoid racing against processes which fork quicker than we can kill them, we repeat this
421
                 * until no new pids need to be killed. */
422

423
        } while (!done);
10,666✔
424

425
        return ret;
426
}
427

428
int cg_kill(
23,739✔
429
                const char *path,
430
                int sig,
431
                CGroupFlags flags,
432
                Set *s,
433
                cg_kill_log_func_t log_kill,
434
                void *userdata) {
435

436
        int r, ret;
23,739✔
437

438
        assert(path);
23,739✔
439

440
        ret = cg_kill_items(path, "cgroup.procs", sig, flags, s, log_kill, userdata);
23,739✔
441
        if (ret < 0)
23,739✔
UNCOV
442
                return log_debug_errno(ret, "Failed to kill processes in cgroup '%s' item cgroup.procs: %m", path);
×
443
        if (sig != SIGKILL)
23,739✔
444
                return ret;
445

446
        /* Only in case of killing with SIGKILL and when using cgroupsv2, kill remaining threads manually as
447
           a workaround for kernel bug. It was fixed in 5.2-rc5 (c03cd7738a83), backported to 4.19.66
448
           (4340d175b898) and 4.14.138 (feb6b123b7dd). */
449
        r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
5,494✔
450
        if (r < 0)
5,494✔
451
                return r;
452
        if (r == 0)
5,494✔
453
                return ret;
454

455
        /* Opening pidfds for non thread group leaders only works from 6.9 onwards with PIDFD_THREAD. On
456
         * older kernels or without PIDFD_THREAD pidfd_open() fails with EINVAL. Since we might read non
457
         * thread group leader IDs from cgroup.threads, we set CGROUP_NO_PIDFD to avoid trying open pidfd's
458
         * for them and instead use the regular pid. */
459
        r = cg_kill_items(path, "cgroup.threads", sig, flags|CGROUP_NO_PIDFD, s, log_kill, userdata);
5,494✔
460
        if (r < 0)
5,494✔
UNCOV
461
                return log_debug_errno(r, "Failed to kill processes in cgroup '%s' item cgroup.threads: %m", path);
×
462

463
        return r > 0 || ret > 0;
5,494✔
464
}
465

466
int cg_kill_recursive(
23,271✔
467
                const char *path,
468
                int sig,
469
                CGroupFlags flags,
470
                Set *s,
471
                cg_kill_log_func_t log_kill,
472
                void *userdata) {
473

UNCOV
474
        _cleanup_set_free_ Set *allocated_set = NULL;
×
475
        _cleanup_closedir_ DIR *d = NULL;
23,271✔
476
        int r, ret;
23,271✔
477

478
        assert(path);
23,271✔
479
        assert(sig >= 0);
23,271✔
480

481
        if (!s) {
23,271✔
482
                s = allocated_set = set_new(NULL);
22,356✔
483
                if (!s)
22,356✔
484
                        return -ENOMEM;
485
        }
486

487
        ret = cg_kill(path, sig, flags, s, log_kill, userdata);
23,271✔
488

489
        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
23,271✔
490
        if (r < 0) {
23,271✔
491
                if (r != -ENOENT)
16,171✔
UNCOV
492
                        RET_GATHER(ret, log_debug_errno(r, "Failed to enumerate cgroup '%s' subgroups: %m", path));
×
493

494
                return ret;
16,171✔
495
        }
496

497
        for (;;) {
7,464✔
498
                _cleanup_free_ char *fn = NULL, *p = NULL;
7,282✔
499

500
                r = cg_read_subgroup(d, &fn);
7,282✔
501
                if (r < 0) {
7,282✔
UNCOV
502
                        RET_GATHER(ret, log_debug_errno(r, "Failed to read subgroup from cgroup '%s': %m", path));
×
503
                        break;
504
                }
505
                if (r == 0)
7,282✔
506
                        break;
507

508
                p = path_join(empty_to_root(path), fn);
364✔
509
                if (!p)
182✔
UNCOV
510
                        return -ENOMEM;
×
511

512
                r = cg_kill_recursive(p, sig, flags, s, log_kill, userdata);
182✔
513
                if (r < 0)
182✔
UNCOV
514
                        log_debug_errno(r, "Failed to recursively kill processes in cgroup '%s': %m", p);
×
515
                if (r != 0 && ret >= 0)
182✔
516
                        ret = r;
15✔
517
        }
518

519
        return ret;
7,100✔
520
}
521

522
int cg_kill_kernel_sigkill(const char *path) {
×
523
        _cleanup_free_ char *killfile = NULL;
×
UNCOV
524
        int r;
×
525

526
        /* Kills the cgroup at `path` directly by writing to its cgroup.kill file.  This sends SIGKILL to all
527
         * processes in the cgroup and has the advantage of being completely atomic, unlike cg_kill_items(). */
528

UNCOV
529
        assert(path);
×
530

UNCOV
531
        if (!cg_kill_supported())
×
532
                return -EOPNOTSUPP;
533

534
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.kill", &killfile);
×
UNCOV
535
        if (r < 0)
×
536
                return r;
537

538
        r = write_string_file(killfile, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
×
539
        if (r < 0)
×
UNCOV
540
                return log_debug_errno(r, "Failed to write to cgroup.kill for cgroup '%s': %m", path);
×
541

542
        return 0;
543
}
544

545
static const char *controller_to_dirname(const char *controller) {
×
UNCOV
546
        assert(controller);
×
547

548
        /* Converts a controller name to the directory name below /sys/fs/cgroup/ we want to mount it
549
         * to. Effectively, this just cuts off the name= prefixed used for named hierarchies, if it is
550
         * specified. */
551

552
        if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
×
UNCOV
553
                if (cg_hybrid_unified() > 0)
×
554
                        controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
555
                else
UNCOV
556
                        controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
×
557
        }
558

UNCOV
559
        return startswith(controller, "name=") ?: controller;
×
560
}
561

562
static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **ret) {
×
563
        const char *dn;
×
UNCOV
564
        char *t = NULL;
×
565

566
        assert(ret);
×
UNCOV
567
        assert(controller);
×
568

UNCOV
569
        dn = controller_to_dirname(controller);
×
570

571
        if (isempty(path) && isempty(suffix))
×
572
                t = path_join("/sys/fs/cgroup", dn);
×
573
        else if (isempty(path))
×
574
                t = path_join("/sys/fs/cgroup", dn, suffix);
×
575
        else if (isempty(suffix))
×
UNCOV
576
                t = path_join("/sys/fs/cgroup", dn, path);
×
577
        else
578
                t = path_join("/sys/fs/cgroup", dn, path, suffix);
×
UNCOV
579
        if (!t)
×
580
                return -ENOMEM;
581

582
        *ret = t;
×
UNCOV
583
        return 0;
×
584
}
585

586
static int join_path_unified(const char *path, const char *suffix, char **ret) {
328,428✔
587
        char *t;
328,428✔
588

589
        assert(ret);
328,428✔
590

591
        if (isempty(path) && isempty(suffix))
344,396✔
592
                t = strdup("/sys/fs/cgroup");
2,585✔
593
        else if (isempty(path))
325,843✔
594
                t = path_join("/sys/fs/cgroup", suffix);
13,383✔
595
        else if (isempty(suffix))
312,460✔
596
                t = path_join("/sys/fs/cgroup", path);
122,017✔
597
        else
598
                t = path_join("/sys/fs/cgroup", path, suffix);
190,443✔
599
        if (!t)
328,428✔
600
                return -ENOMEM;
601

602
        *ret = t;
328,428✔
603
        return 0;
328,428✔
604
}
605

606
int cg_get_path(const char *controller, const char *path, const char *suffix, char **ret) {
328,685✔
607
        int r;
328,685✔
608

609
        assert(ret);
328,685✔
610

611
        if (!controller) {
328,685✔
612
                char *t;
257✔
613

614
                /* If no controller is specified, we return the path *below* the controllers, without any
615
                 * prefix. */
616

617
                if (isempty(path) && isempty(suffix))
257✔
618
                        return -EINVAL;
619

620
                if (isempty(suffix))
257✔
UNCOV
621
                        t = strdup(path);
×
622
                else if (isempty(path))
257✔
UNCOV
623
                        t = strdup(suffix);
×
624
                else
625
                        t = path_join(path, suffix);
257✔
626
                if (!t)
257✔
627
                        return -ENOMEM;
628

629
                *ret = path_simplify(t);
257✔
630
                return 0;
257✔
631
        }
632

633
        if (!cg_controller_is_valid(controller))
328,428✔
634
                return -EINVAL;
635

636
        r = cg_all_unified();
328,428✔
637
        if (r < 0)
328,428✔
638
                return r;
639
        if (r > 0)
328,428✔
640
                r = join_path_unified(path, suffix, ret);
328,428✔
641
        else
UNCOV
642
                r = join_path_legacy(controller, path, suffix, ret);
×
643
        if (r < 0)
328,428✔
644
                return r;
645

646
        path_simplify(*ret);
328,428✔
647
        return 0;
328,428✔
648
}
649

650
static int controller_is_v1_accessible(const char *root, const char *controller) {
×
UNCOV
651
        const char *cpath, *dn;
×
652

UNCOV
653
        assert(controller);
×
654

UNCOV
655
        dn = controller_to_dirname(controller);
×
656

657
        /* If root if specified, we check that:
658
         * - possible subcgroup is created at root,
659
         * - we can modify the hierarchy. */
660

661
        cpath = strjoina("/sys/fs/cgroup/", dn, root, root ? "/cgroup.procs" : NULL);
×
UNCOV
662
        return access_nofollow(cpath, root ? W_OK : F_OK);
×
663
}
664

665
int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **ret) {
24,123✔
666
        int r;
24,123✔
667

668
        assert(controller);
24,123✔
669
        assert(ret);
24,123✔
670

671
        if (!cg_controller_is_valid(controller))
24,123✔
672
                return -EINVAL;
673

674
        r = cg_all_unified();
24,123✔
675
        if (r < 0)
24,123✔
676
                return r;
677
        if (r > 0) {
24,123✔
678
                /* In the unified hierarchy all controllers are considered accessible,
679
                 * except for the named hierarchies */
680
                if (startswith(controller, "name="))
24,123✔
681
                        return -EOPNOTSUPP;
682
        } else {
683
                /* Check if the specified controller is actually accessible */
684
                r = controller_is_v1_accessible(NULL, controller);
×
UNCOV
685
                if (r < 0)
×
686
                        return r;
687
        }
688

689
        return cg_get_path(controller, path, suffix, ret);
24,123✔
690
}
691

692
int cg_set_xattr(const char *path, const char *name, const void *value, size_t size, int flags) {
8,072✔
693
        _cleanup_free_ char *fs = NULL;
8,072✔
694
        int r;
8,072✔
695

696
        assert(path);
8,072✔
697
        assert(name);
8,072✔
698
        assert(value || size <= 0);
8,072✔
699

700
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
8,072✔
701
        if (r < 0)
8,072✔
702
                return r;
703

704
        return RET_NERRNO(setxattr(fs, name, value, size, flags));
8,072✔
705
}
706

707
int cg_get_xattr(const char *path, const char *name, void *value, size_t size) {
×
708
        _cleanup_free_ char *fs = NULL;
×
709
        ssize_t n;
×
UNCOV
710
        int r;
×
711

712
        assert(path);
×
UNCOV
713
        assert(name);
×
714

715
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
×
UNCOV
716
        if (r < 0)
×
717
                return r;
718

719
        n = getxattr(fs, name, value, size);
×
720
        if (n < 0)
×
UNCOV
721
                return -errno;
×
722

UNCOV
723
        return (int) n;
×
724
}
725

726
int cg_get_xattr_malloc(const char *path, const char *name, char **ret) {
20,059✔
727
        _cleanup_free_ char *fs = NULL;
20,059✔
728
        int r;
20,059✔
729

730
        assert(path);
20,059✔
731
        assert(name);
20,059✔
732

733
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
20,059✔
734
        if (r < 0)
20,059✔
735
                return r;
736

737
        return lgetxattr_malloc(fs, name, ret);
20,059✔
738
}
739

740
int cg_get_xattr_bool(const char *path, const char *name) {
423✔
741
        _cleanup_free_ char *fs = NULL;
423✔
742
        int r;
423✔
743

744
        assert(path);
423✔
745
        assert(name);
423✔
746

747
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
423✔
748
        if (r < 0)
423✔
749
                return r;
750

751
        return getxattr_at_bool(AT_FDCWD, fs, name, /* flags= */ 0);
423✔
752
}
753

754
int cg_remove_xattr(const char *path, const char *name) {
39,886✔
755
        _cleanup_free_ char *fs = NULL;
39,886✔
756
        int r;
39,886✔
757

758
        assert(path);
39,886✔
759
        assert(name);
39,886✔
760

761
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs);
39,886✔
762
        if (r < 0)
39,886✔
763
                return r;
764

765
        return RET_NERRNO(removexattr(fs, name));
79,772✔
766
}
767

768
int cg_pid_get_path(const char *controller, pid_t pid, char **ret_path) {
51,010✔
769
        _cleanup_fclose_ FILE *f = NULL;
51,010✔
770
        const char *fs, *controller_str = NULL;  /* avoid false maybe-uninitialized warning */
51,010✔
771
        int unified, r;
51,010✔
772

773
        assert(pid >= 0);
51,010✔
774
        assert(ret_path);
51,010✔
775

776
        if (controller) {
51,010✔
777
                if (!cg_controller_is_valid(controller))
50,674✔
778
                        return -EINVAL;
779
        } else
780
                controller = SYSTEMD_CGROUP_CONTROLLER;
781

782
        unified = cg_unified_controller(controller);
51,010✔
783
        if (unified < 0)
51,010✔
784
                return unified;
785
        if (unified == 0) {
51,010✔
UNCOV
786
                if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
×
787
                        controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
788
                else
UNCOV
789
                        controller_str = controller;
×
790
        }
791

792
        fs = procfs_file_alloca(pid, "cgroup");
51,010✔
793
        r = fopen_unlocked(fs, "re", &f);
51,010✔
794
        if (r == -ENOENT)
51,010✔
795
                return -ESRCH;
796
        if (r < 0)
48,096✔
797
                return r;
798

799
        for (;;) {
49,270✔
800
                _cleanup_free_ char *line = NULL;
48,683✔
801
                char *e;
48,683✔
802

803
                r = read_line(f, LONG_LINE_MAX, &line);
48,683✔
804
                if (r < 0)
48,683✔
805
                        return r;
806
                if (r == 0)
48,678✔
807
                        return -ENODATA;
808

809
                if (unified) {
48,678✔
810
                        e = startswith(line, "0:");
48,678✔
811
                        if (!e)
48,678✔
812
                                continue;
587✔
813

814
                        e = strchr(e, ':');
48,091✔
815
                        if (!e)
48,091✔
UNCOV
816
                                continue;
×
817
                } else {
UNCOV
818
                        char *l;
×
819

820
                        l = strchr(line, ':');
×
821
                        if (!l)
×
UNCOV
822
                                continue;
×
823

824
                        l++;
×
825
                        e = strchr(l, ':');
×
826
                        if (!e)
×
827
                                continue;
×
UNCOV
828
                        *e = 0;
×
829

830
                        assert(controller_str);
×
831
                        r = string_contains_word(l, ",", controller_str);
×
UNCOV
832
                        if (r < 0)
×
833
                                return r;
834
                        if (r == 0)
×
UNCOV
835
                                continue;
×
836
                }
837

838
                _cleanup_free_ char *path = strdup(e + 1);
48,091✔
839
                if (!path)
48,091✔
840
                        return -ENOMEM;
841

842
                /* Refuse cgroup paths from outside our cgroup namespace */
843
                if (startswith(path, "/../"))
48,091✔
844
                        return -EUNATCH;
845

846
                /* Truncate suffix indicating the process is a zombie */
847
                e = endswith(path, " (deleted)");
48,091✔
848
                if (e)
48,091✔
849
                        *e = 0;
1,183✔
850

851
                *ret_path = TAKE_PTR(path);
48,091✔
852
                return 0;
48,091✔
853
        }
854
}
855

856
int cg_pidref_get_path(const char *controller, const PidRef *pidref, char **ret_path) {
21,001✔
857
        _cleanup_free_ char *path = NULL;
21,001✔
858
        int r;
21,001✔
859

860
        assert(ret_path);
21,001✔
861

862
        if (!pidref_is_set(pidref))
21,001✔
863
                return -ESRCH;
864
        if (pidref_is_remote(pidref))
42,002✔
865
                return -EREMOTE;
866

867
        // XXX: Ideally we'd use pidfd_get_cgroupid() + cg_path_from_cgroupid() here, to extract this
868
        // bit of information from pidfd directly. However, the latter requires privilege and it's
869
        // not entirely clear how to handle cgroups from outer namespace.
870

871
        r = cg_pid_get_path(controller, pidref->pid, &path);
21,001✔
872
        if (r < 0)
21,001✔
873
                return r;
874

875
        /* Before we return the path, make sure the procfs entry for this pid still matches the pidref */
876
        r = pidref_verify(pidref);
21,001✔
877
        if (r < 0)
21,001✔
878
                return r;
879

880
        *ret_path = TAKE_PTR(path);
21,001✔
881
        return 0;
21,001✔
882
}
883

884
int cg_is_empty(const char *controller, const char *path) {
4✔
885
        _cleanup_fclose_ FILE *f = NULL;
4✔
886
        pid_t pid;
4✔
887
        int r;
4✔
888

889
        assert(path);
4✔
890

891
        r = cg_enumerate_processes(controller, path, &f);
4✔
892
        if (r == -ENOENT)
4✔
893
                return true;
894
        if (r < 0)
4✔
895
                return r;
896

897
        r = cg_read_pid(f, &pid, CGROUP_DONT_SKIP_UNMAPPED);
4✔
898
        if (r < 0)
4✔
899
                return r;
900

901
        return r == 0;
4✔
902
}
903

904
int cg_is_empty_recursive(const char *controller, const char *path) {
12,565✔
905
        int r;
12,565✔
906

907
        assert(path);
12,565✔
908

909
        /* The root cgroup is always populated */
910
        if (controller && empty_or_root(path))
12,565✔
911
                return false;
912

913
        r = cg_unified_controller(controller);
12,565✔
914
        if (r < 0)
12,565✔
915
                return r;
916
        if (r > 0) {
12,565✔
917
                _cleanup_free_ char *t = NULL;
12,565✔
918

919
                /* On the unified hierarchy we can check empty state
920
                 * via the "populated" attribute of "cgroup.events". */
921

922
                r = cg_read_event(controller, path, "populated", &t);
12,565✔
923
                if (r == -ENOENT)
12,565✔
924
                        return true;
925
                if (r < 0)
4,799✔
926
                        return r;
927

928
                return streq(t, "0");
4,799✔
929
        } else {
930
                _cleanup_closedir_ DIR *d = NULL;
×
UNCOV
931
                char *fn;
×
932

933
                r = cg_is_empty(controller, path);
×
UNCOV
934
                if (r <= 0)
×
935
                        return r;
936

937
                r = cg_enumerate_subgroups(controller, path, &d);
×
UNCOV
938
                if (r == -ENOENT)
×
939
                        return true;
UNCOV
940
                if (r < 0)
×
941
                        return r;
942

943
                while ((r = cg_read_subgroup(d, &fn)) > 0) {
×
UNCOV
944
                        _cleanup_free_ char *p = NULL;
×
945

946
                        p = path_join(path, fn);
×
947
                        free(fn);
×
UNCOV
948
                        if (!p)
×
949
                                return -ENOMEM;
950

951
                        r = cg_is_empty_recursive(controller, p);
×
UNCOV
952
                        if (r <= 0)
×
953
                                return r;
954
                }
UNCOV
955
                if (r < 0)
×
956
                        return r;
957

UNCOV
958
                return true;
×
959
        }
960
}
961

962
int cg_split_spec(const char *spec, char **ret_controller, char **ret_path) {
23✔
963
        _cleanup_free_ char *controller = NULL, *path = NULL;
23✔
964
        int r;
23✔
965

966
        assert(spec);
23✔
967

968
        if (*spec == '/') {
23✔
969
                if (!path_is_normalized(spec))
15✔
970
                        return -EINVAL;
971

972
                if (ret_path) {
15✔
973
                        r = path_simplify_alloc(spec, &path);
15✔
974
                        if (r < 0)
15✔
975
                                return r;
976
                }
977

978
        } else {
979
                const char *e;
8✔
980

981
                e = strchr(spec, ':');
8✔
982
                if (e) {
8✔
983
                        controller = strndup(spec, e-spec);
6✔
984
                        if (!controller)
6✔
985
                                return -ENOMEM;
986
                        if (!cg_controller_is_valid(controller))
6✔
987
                                return -EINVAL;
988

989
                        if (!isempty(e + 1)) {
3✔
990
                                path = strdup(e+1);
2✔
991
                                if (!path)
2✔
992
                                        return -ENOMEM;
993

994
                                if (!path_is_normalized(path) ||
2✔
995
                                    !path_is_absolute(path))
2✔
996
                                        return -EINVAL;
997

998
                                path_simplify(path);
1✔
999
                        }
1000

1001
                } else {
1002
                        if (!cg_controller_is_valid(spec))
2✔
1003
                                return -EINVAL;
1004

1005
                        if (ret_controller) {
1✔
1006
                                controller = strdup(spec);
1✔
1007
                                if (!controller)
1✔
1008
                                        return -ENOMEM;
1009
                        }
1010
                }
1011
        }
1012

1013
        if (ret_controller)
18✔
1014
                *ret_controller = TAKE_PTR(controller);
18✔
1015
        if (ret_path)
18✔
1016
                *ret_path = TAKE_PTR(path);
18✔
1017
        return 0;
1018
}
1019

1020
int cg_mangle_path(const char *path, char **ret) {
465✔
1021
        _cleanup_free_ char *c = NULL, *p = NULL;
465✔
1022
        int r;
465✔
1023

1024
        assert(path);
465✔
1025
        assert(ret);
465✔
1026

1027
        /* First, check if it already is a filesystem path */
1028
        if (path_startswith(path, "/sys/fs/cgroup"))
465✔
1029
                return path_simplify_alloc(path, ret);
461✔
1030

1031
        /* Otherwise, treat it as cg spec */
1032
        r = cg_split_spec(path, &c, &p);
4✔
1033
        if (r < 0)
4✔
1034
                return r;
1035

1036
        return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, ret);
8✔
1037
}
1038

1039
int cg_get_root_path(char **ret_path) {
14,192✔
1040
        char *p, *e;
14,192✔
1041
        int r;
14,192✔
1042

1043
        assert(ret_path);
14,192✔
1044

1045
        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
14,192✔
1046
        if (r < 0)
14,192✔
1047
                return r;
14,192✔
1048

1049
        e = endswith(p, "/" SPECIAL_INIT_SCOPE);
14,192✔
1050
        if (!e)
14,192✔
1051
                e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
79✔
1052
        if (!e)
79✔
1053
                e = endswith(p, "/system"); /* even more legacy */
79✔
1054
        if (e)
14,192✔
1055
                *e = 0;
14,113✔
1056

1057
        *ret_path = p;
14,192✔
1058
        return 0;
14,192✔
1059
}
1060

1061
int cg_shift_path(const char *cgroup, const char *root, const char **ret_shifted) {
11,263✔
1062
        _cleanup_free_ char *rt = NULL;
11,263✔
1063
        char *p;
11,263✔
1064
        int r;
11,263✔
1065

1066
        assert(cgroup);
11,263✔
1067
        assert(ret_shifted);
11,263✔
1068

1069
        if (!root) {
11,263✔
1070
                /* If the root was specified let's use that, otherwise
1071
                 * let's determine it from PID 1 */
1072

1073
                r = cg_get_root_path(&rt);
2,087✔
1074
                if (r < 0)
2,087✔
1075
                        return r;
1076

1077
                root = rt;
2,087✔
1078
        }
1079

1080
        p = path_startswith(cgroup, root);
11,263✔
1081
        if (p && p > cgroup)
11,263✔
1082
                *ret_shifted = p - 1;
2✔
1083
        else
1084
                *ret_shifted = cgroup;
11,261✔
1085

1086
        return 0;
1087
}
1088

1089
int cg_pid_get_path_shifted(pid_t pid, const char *root, char **ret_cgroup) {
14,006✔
1090
        _cleanup_free_ char *raw = NULL;
14,006✔
1091
        const char *c;
14,006✔
1092
        int r;
14,006✔
1093

1094
        assert(pid >= 0);
14,006✔
1095
        assert(ret_cgroup);
14,006✔
1096

1097
        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
14,006✔
1098
        if (r < 0)
14,006✔
1099
                return r;
1100

1101
        r = cg_shift_path(raw, root, &c);
11,087✔
1102
        if (r < 0)
11,087✔
1103
                return r;
1104

1105
        if (c == raw) {
11,087✔
1106
                *ret_cgroup = TAKE_PTR(raw);
11,087✔
1107
                return 0;
11,087✔
1108
        }
1109

UNCOV
1110
        return strdup_to(ret_cgroup, c);
×
1111
}
1112

1113
int cg_path_decode_unit(const char *cgroup, char **ret_unit) {
32,661✔
1114
        assert(cgroup);
32,661✔
1115
        assert(ret_unit);
32,661✔
1116

1117
        size_t n = strcspn(cgroup, "/");
32,661✔
1118
        if (n < 3)
32,661✔
1119
                return -ENXIO;
1120

1121
        char *c = strndupa_safe(cgroup, n);
32,653✔
1122
        c = cg_unescape(c);
32,653✔
1123

1124
        if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
32,653✔
1125
                return -ENXIO;
1126

1127
        return strdup_to(ret_unit, c);
32,645✔
1128
}
1129

1130
static bool valid_slice_name(const char *p, size_t n) {
115,915✔
1131

1132
        if (!p)
115,915✔
1133
                return false;
1134

1135
        if (n < STRLEN("x.slice"))
115,899✔
1136
                return false;
1137

1138
        if (memcmp(p + n - 6, ".slice", 6) == 0) {
115,880✔
1139
                char buf[n+1], *c;
58,706✔
1140

1141
                memcpy(buf, p, n);
58,706✔
1142
                buf[n] = 0;
58,706✔
1143

1144
                c = cg_unescape(buf);
58,706✔
1145

1146
                return unit_name_is_valid(c, UNIT_NAME_PLAIN);
58,706✔
1147
        }
1148

1149
        return false;
1150
}
1151

1152
static const char *skip_slices(const char *p) {
41,230✔
1153
        assert(p);
41,230✔
1154

1155
        /* Skips over all slice assignments */
1156

1157
        for (;;) {
125,786✔
1158
                size_t n;
83,508✔
1159

1160
                p += strspn(p, "/");
83,508✔
1161

1162
                n = strcspn(p, "/");
83,508✔
1163
                if (!valid_slice_name(p, n))
83,508✔
1164
                        return p;
41,230✔
1165

1166
                p += n;
42,278✔
1167
        }
1168
}
1169

1170
int cg_path_get_unit(const char *path, char **ret) {
16,895✔
1171
        _cleanup_free_ char *unit = NULL;
16,895✔
1172
        const char *e;
16,895✔
1173
        int r;
16,895✔
1174

1175
        assert(path);
16,895✔
1176
        assert(ret);
16,895✔
1177

1178
        e = skip_slices(path);
16,895✔
1179

1180
        r = cg_path_decode_unit(e, &unit);
16,895✔
1181
        if (r < 0)
16,895✔
1182
                return r;
1183

1184
        /* We skipped over the slices, don't accept any now */
1185
        if (endswith(unit, ".slice"))
16,883✔
1186
                return -ENXIO;
1187

1188
        *ret = TAKE_PTR(unit);
16,883✔
1189
        return 0;
16,883✔
1190
}
1191

1192
int cg_path_get_unit_path(const char *path, char **ret) {
9,016✔
1193
        _cleanup_free_ char *path_copy = NULL;
9,016✔
1194
        char *unit_name;
9,016✔
1195

1196
        assert(path);
9,016✔
1197
        assert(ret);
9,016✔
1198

1199
        path_copy = strdup(path);
9,016✔
1200
        if (!path_copy)
9,016✔
1201
                return -ENOMEM;
1202

1203
        unit_name = (char *)skip_slices(path_copy);
9,016✔
1204
        unit_name[strcspn(unit_name, "/")] = 0;
9,016✔
1205

1206
        if (!unit_name_is_valid(cg_unescape(unit_name), UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
9,016✔
1207
                return -ENXIO;
1208

1209
        *ret = TAKE_PTR(path_copy);
9,013✔
1210

1211
        return 0;
9,013✔
1212
}
1213

1214
int cg_pid_get_unit(pid_t pid, char **ret_unit) {
573✔
1215
        _cleanup_free_ char *cgroup = NULL;
573✔
1216
        int r;
573✔
1217

1218
        assert(ret_unit);
573✔
1219

1220
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
573✔
1221
        if (r < 0)
573✔
1222
                return r;
1223

1224
        return cg_path_get_unit(cgroup, ret_unit);
567✔
1225
}
1226

1227
int cg_pidref_get_unit(const PidRef *pidref, char **ret) {
431✔
1228
        _cleanup_free_ char *unit = NULL;
431✔
1229
        int r;
431✔
1230

1231
        assert(ret);
431✔
1232

1233
        if (!pidref_is_set(pidref))
431✔
1234
                return -ESRCH;
1235
        if (pidref_is_remote(pidref))
862✔
1236
                return -EREMOTE;
1237

1238
        r = cg_pid_get_unit(pidref->pid, &unit);
431✔
1239
        if (r < 0)
431✔
1240
                return r;
1241

1242
        r = pidref_verify(pidref);
425✔
1243
        if (r < 0)
425✔
1244
                return r;
1245

1246
        *ret = TAKE_PTR(unit);
425✔
1247
        return 0;
425✔
1248
}
1249

1250
/**
1251
 * Skip session-*.scope, but require it to be there.
1252
 */
1253
static const char *skip_session(const char *p) {
14,936✔
1254
        size_t n;
14,936✔
1255

1256
        if (isempty(p))
14,936✔
1257
                return NULL;
1258

1259
        p += strspn(p, "/");
14,932✔
1260

1261
        n = strcspn(p, "/");
14,932✔
1262
        if (n < STRLEN("session-x.scope"))
14,932✔
1263
                return NULL;
1264

1265
        if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
14,770✔
1266
                char buf[n - 8 - 6 + 1];
23✔
1267

1268
                memcpy(buf, p + 8, n - 8 - 6);
23✔
1269
                buf[n - 8 - 6] = 0;
23✔
1270

1271
                /* Note that session scopes never need unescaping,
1272
                 * since they cannot conflict with the kernel's own
1273
                 * names, hence we don't need to call cg_unescape()
1274
                 * here. */
1275

1276
                if (!session_id_valid(buf))
23✔
1277
                        return NULL;
23✔
1278

1279
                p += n;
23✔
1280
                p += strspn(p, "/");
23✔
1281
                return p;
23✔
1282
        }
1283

1284
        return NULL;
1285
}
1286

1287
/**
1288
 * Skip user@*.service or capsule@*.service, but require either of them to be there.
1289
 */
1290
static const char *skip_user_manager(const char *p) {
15,319✔
1291
        size_t n;
15,319✔
1292

1293
        if (isempty(p))
15,319✔
1294
                return NULL;
15,319✔
1295

1296
        p += strspn(p, "/");
15,315✔
1297

1298
        n = strcspn(p, "/");
15,315✔
1299
        if (n < CONST_MIN(STRLEN("user@x.service"), STRLEN("capsule@x.service")))
15,315✔
1300
                return NULL;
1301

1302
        /* Any possible errors from functions called below are converted to NULL return, so our callers won't
1303
         * resolve user/capsule name. */
1304
        _cleanup_free_ char *unit_name = strndup(p, n);
15,153✔
1305
        if (!unit_name)
15,153✔
1306
                return NULL;
1307

1308
        _cleanup_free_ char *i = NULL;
15,153✔
1309
        UnitNameFlags type = unit_name_to_instance(unit_name, &i);
15,153✔
1310

1311
        if (type != UNIT_NAME_INSTANCE)
15,153✔
1312
                return NULL;
1313

1314
        /* Note that user manager services never need unescaping, since they cannot conflict with the
1315
         * kernel's own names, hence we don't need to call cg_unescape() here.  Prudently check validity of
1316
         * instance names, they should be always valid as we validate them upon unit start. */
1317
        if (startswith(unit_name, "user@")) {
475✔
1318
                if (parse_uid(i, NULL) < 0)
378✔
1319
                        return NULL;
1320

1321
                p += n;
378✔
1322
                p += strspn(p, "/");
378✔
1323
                return p;
378✔
1324
        } else if (startswith(unit_name, "capsule@")) {
97✔
1325
                if (capsule_name_is_valid(i) <= 0)
5✔
1326
                        return NULL;
1327

1328
                p += n;
5✔
1329
                p += strspn(p, "/");
5✔
1330
                return p;
5✔
1331
        }
1332

1333
        return NULL;
1334
}
1335

1336
static const char *skip_user_prefix(const char *path) {
15,319✔
1337
        const char *e, *t;
15,319✔
1338

1339
        assert(path);
15,319✔
1340

1341
        /* Skip slices, if there are any */
1342
        e = skip_slices(path);
15,319✔
1343

1344
        /* Skip the user manager, if it's in the path now... */
1345
        t = skip_user_manager(e);
15,319✔
1346
        if (t)
15,319✔
1347
                return t;
1348

1349
        /* Alternatively skip the user session if it is in the path... */
1350
        return skip_session(e);
14,936✔
1351
}
1352

1353
int cg_path_get_user_unit(const char *path, char **ret) {
7,719✔
1354
        const char *t;
7,719✔
1355

1356
        assert(path);
7,719✔
1357
        assert(ret);
7,719✔
1358

1359
        t = skip_user_prefix(path);
7,719✔
1360
        if (!t)
7,719✔
1361
                return -ENXIO;
1362

1363
        /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
1364
         * parser. */
1365
        return cg_path_get_unit(t, ret);
209✔
1366
}
1367

1368
int cg_pid_get_user_unit(pid_t pid, char **ret_unit) {
119✔
1369
        _cleanup_free_ char *cgroup = NULL;
119✔
1370
        int r;
119✔
1371

1372
        assert(ret_unit);
119✔
1373

1374
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
119✔
1375
        if (r < 0)
119✔
1376
                return r;
1377

1378
        return cg_path_get_user_unit(cgroup, ret_unit);
119✔
1379
}
1380

1381
int cg_path_get_machine_name(const char *path, char **ret_machine) {
103✔
1382
        _cleanup_free_ char *u = NULL;
103✔
1383
        const char *sl;
103✔
1384
        int r;
103✔
1385

1386
        r = cg_path_get_unit(path, &u);
103✔
1387
        if (r < 0)
103✔
1388
                return r;
1389

1390
        sl = strjoina("/run/systemd/machines/unit:", u);
515✔
1391
        return readlink_malloc(sl, ret_machine);
103✔
1392
}
1393

1394
int cg_pid_get_machine_name(pid_t pid, char **ret_machine) {
103✔
1395
        _cleanup_free_ char *cgroup = NULL;
103✔
1396
        int r;
103✔
1397

1398
        assert(ret_machine);
103✔
1399

1400
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
103✔
1401
        if (r < 0)
103✔
1402
                return r;
1403

1404
        return cg_path_get_machine_name(cgroup, ret_machine);
103✔
1405
}
1406

1407
int cg_path_get_session(const char *path, char **ret_session) {
8,364✔
1408
        _cleanup_free_ char *unit = NULL;
8,364✔
1409
        char *start, *end;
8,364✔
1410
        int r;
8,364✔
1411

1412
        assert(path);
8,364✔
1413

1414
        r = cg_path_get_unit(path, &unit);
8,364✔
1415
        if (r < 0)
8,364✔
1416
                return r;
1417

1418
        start = startswith(unit, "session-");
8,363✔
1419
        if (!start)
8,363✔
1420
                return -ENXIO;
1421
        end = endswith(start, ".scope");
244✔
1422
        if (!end)
244✔
1423
                return -ENXIO;
1424

1425
        *end = 0;
244✔
1426
        if (!session_id_valid(start))
244✔
1427
                return -ENXIO;
1428

1429
        if (!ret_session)
243✔
1430
                return 0;
1431

1432
        return strdup_to(ret_session, start);
243✔
1433
}
1434

1435
int cg_pid_get_session(pid_t pid, char **ret_session) {
704✔
1436
        _cleanup_free_ char *cgroup = NULL;
704✔
1437
        int r;
704✔
1438

1439
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
704✔
1440
        if (r < 0)
704✔
1441
                return r;
1442

1443
        return cg_path_get_session(cgroup, ret_session);
704✔
1444
}
1445

1446
int cg_pidref_get_session(const PidRef *pidref, char **ret) {
259✔
1447
        int r;
259✔
1448

1449
        if (!pidref_is_set(pidref))
259✔
1450
                return -ESRCH;
259✔
1451
        if (pidref_is_remote(pidref))
518✔
1452
                return -EREMOTE;
1453

1454
        _cleanup_free_ char *session = NULL;
259✔
1455
        r = cg_pid_get_session(pidref->pid, &session);
259✔
1456
        if (r < 0)
259✔
1457
                return r;
1458

1459
        r = pidref_verify(pidref);
210✔
1460
        if (r < 0)
210✔
1461
                return r;
1462

1463
        if (ret)
210✔
1464
                *ret = TAKE_PTR(session);
210✔
1465
        return 0;
1466
}
1467

1468
int cg_path_get_owner_uid(const char *path, uid_t *ret_uid) {
8,065✔
1469
        _cleanup_free_ char *slice = NULL;
8,065✔
1470
        char *start, *end;
8,065✔
1471
        int r;
8,065✔
1472

1473
        assert(path);
8,065✔
1474

1475
        r = cg_path_get_slice(path, &slice);
8,065✔
1476
        if (r < 0)
8,065✔
1477
                return r;
1478

1479
        start = startswith(slice, "user-");
8,065✔
1480
        if (!start)
8,065✔
1481
                return -ENXIO;
1482

1483
        end = endswith(start, ".slice");
414✔
1484
        if (!end)
414✔
1485
                return -ENXIO;
1486

1487
        *end = 0;
414✔
1488
        if (parse_uid(start, ret_uid) < 0)
414✔
UNCOV
1489
                return -ENXIO;
×
1490

1491
        return 0;
1492
}
1493

1494
int cg_pid_get_owner_uid(pid_t pid, uid_t *ret_uid) {
423✔
1495
        _cleanup_free_ char *cgroup = NULL;
423✔
1496
        int r;
423✔
1497

1498
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
423✔
1499
        if (r < 0)
423✔
1500
                return r;
1501

1502
        return cg_path_get_owner_uid(cgroup, ret_uid);
423✔
1503
}
1504

1505
int cg_pidref_get_owner_uid(const PidRef *pidref, uid_t *ret) {
47✔
1506
        int r;
47✔
1507

1508
        if (!pidref_is_set(pidref))
47✔
1509
                return -ESRCH;
47✔
1510
        if (pidref_is_remote(pidref))
47✔
1511
                return -EREMOTE;
1512

1513
        uid_t uid;
47✔
1514
        r = cg_pid_get_owner_uid(pidref->pid, &uid);
47✔
1515
        if (r < 0)
47✔
1516
                return r;
1517

1518
        r = pidref_verify(pidref);
8✔
1519
        if (r < 0)
8✔
1520
                return r;
1521

1522
        if (ret)
8✔
1523
                *ret = uid;
8✔
1524

1525
        return 0;
1526
}
1527

1528
int cg_path_get_slice(const char *p, char **ret_slice) {
15,979✔
1529
        const char *e = NULL;
15,979✔
1530

1531
        assert(p);
15,979✔
1532
        assert(ret_slice);
15,979✔
1533

1534
        /* Finds the right-most slice unit from the beginning, but stops before we come to
1535
         * the first non-slice unit. */
1536

1537
        for (;;) {
48,835✔
1538
                const char *s;
32,407✔
1539
                int n;
32,407✔
1540

1541
                n = path_find_first_component(&p, /* accept_dot_dot = */ false, &s);
32,407✔
1542
                if (n < 0)
32,407✔
UNCOV
1543
                        return n;
×
1544
                if (!valid_slice_name(s, n))
32,407✔
1545
                        break;
1546

1547
                e = s;
16,428✔
1548
        }
1549

1550
        if (e)
15,979✔
1551
                return cg_path_decode_unit(e, ret_slice);
15,757✔
1552

1553
        return strdup_to(ret_slice, SPECIAL_ROOT_SLICE);
222✔
1554
}
1555

1556
int cg_pid_get_slice(pid_t pid, char **ret_slice) {
123✔
1557
        _cleanup_free_ char *cgroup = NULL;
123✔
1558
        int r;
123✔
1559

1560
        assert(ret_slice);
123✔
1561

1562
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
123✔
1563
        if (r < 0)
123✔
1564
                return r;
1565

1566
        return cg_path_get_slice(cgroup, ret_slice);
123✔
1567
}
1568

1569
int cg_path_get_user_slice(const char *p, char **ret_slice) {
7,600✔
1570
        const char *t;
7,600✔
1571
        assert(p);
7,600✔
1572
        assert(ret_slice);
7,600✔
1573

1574
        t = skip_user_prefix(p);
7,600✔
1575
        if (!t)
7,600✔
1576
                return -ENXIO;
1577

1578
        /* And now it looks pretty much the same as for a system slice, so let's just use the same parser
1579
         * from here on. */
1580
        return cg_path_get_slice(t, ret_slice);
197✔
1581
}
1582

UNCOV
1583
int cg_pid_get_user_slice(pid_t pid, char **ret_slice) {
×
UNCOV
1584
        _cleanup_free_ char *cgroup = NULL;
×
UNCOV
1585
        int r;
×
1586

UNCOV
1587
        assert(ret_slice);
×
1588

UNCOV
1589
        r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
×
UNCOV
1590
        if (r < 0)
×
1591
                return r;
1592

UNCOV
1593
        return cg_path_get_user_slice(cgroup, ret_slice);
×
1594
}
1595

1596
bool cg_needs_escape(const char *p) {
17,380✔
1597

1598
        /* Checks if the specified path is a valid cgroup name by our rules, or if it must be escaped. Note
1599
         * that we consider escaped cgroup names invalid here, as they need to be escaped a second time if
1600
         * they shall be used. Also note that various names cannot be made valid by escaping even if we
1601
         * return true here (because too long, or contain the forbidden character "/"). */
1602

1603
        if (!filename_is_valid(p))
17,380✔
1604
                return true;
1605

1606
        if (IN_SET(p[0], '_', '.'))
17,376✔
1607
                return true;
1608

1609
        if (STR_IN_SET(p, "notify_on_release", "release_agent", "tasks"))
17,370✔
1610
                return true;
2✔
1611

1612
        if (startswith(p, "cgroup."))
17,368✔
1613
                return true;
1614

1615
        for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
243,124✔
1616
                const char *q;
225,758✔
1617

1618
                q = startswith(p, cgroup_controller_to_string(c));
225,758✔
1619
                if (!q)
225,758✔
1620
                        continue;
225,758✔
1621

UNCOV
1622
                if (q[0] == '.')
×
1623
                        return true;
1624
        }
1625

1626
        return false;
1627
}
1628

1629
int cg_escape(const char *p, char **ret) {
17,097✔
1630
        _cleanup_free_ char *n = NULL;
17,097✔
1631

1632
        /* This implements very minimal escaping for names to be used as file names in the cgroup tree: any
1633
         * name which might conflict with a kernel name or is prefixed with '_' is prefixed with a '_'. That
1634
         * way, when reading cgroup names it is sufficient to remove a single prefixing underscore if there
1635
         * is one. */
1636

1637
        /* The return value of this function (unlike cg_unescape()) needs free()! */
1638

1639
        if (cg_needs_escape(p)) {
17,097✔
1640
                n = strjoin("_", p);
7✔
1641
                if (!n)
7✔
1642
                        return -ENOMEM;
1643

1644
                if (!filename_is_valid(n)) /* became invalid due to the prefixing? Or contained things like a slash that cannot be fixed by prefixing? */
7✔
1645
                        return -EINVAL;
1646
        } else {
1647
                n = strdup(p);
17,090✔
1648
                if (!n)
17,090✔
1649
                        return -ENOMEM;
1650
        }
1651

1652
        *ret = TAKE_PTR(n);
17,097✔
1653
        return 0;
17,097✔
1654
}
1655

1656
char* cg_unescape(const char *p) {
100,598✔
1657
        assert(p);
100,598✔
1658

1659
        /* The return value of this function (unlike cg_escape())
1660
         * doesn't need free()! */
1661

1662
        if (p[0] == '_')
100,598✔
1663
                return (char*) p+1;
14✔
1664

1665
        return (char*) p;
1666
}
1667

1668
#define CONTROLLER_VALID                        \
1669
        DIGITS LETTERS                          \
1670
        "_"
1671

1672
bool cg_controller_is_valid(const char *p) {
403,243✔
1673
        const char *t, *s;
403,243✔
1674

1675
        if (!p)
403,243✔
1676
                return false;
1677

1678
        if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
403,243✔
1679
                return true;
1680

1681
        s = startswith(p, "name=");
119,482✔
1682
        if (s)
119,482✔
1683
                p = s;
32✔
1684

1685
        if (IN_SET(*p, 0, '_'))
119,482✔
1686
                return false;
1687

1688
        for (t = p; *t; t++)
766,373✔
1689
                if (!strchr(CONTROLLER_VALID, *t))
646,902✔
1690
                        return false;
1691

1692
        if (t - p > NAME_MAX)
119,471✔
UNCOV
1693
                return false;
×
1694

1695
        return true;
1696
}
1697

1698
int cg_slice_to_path(const char *unit, char **ret) {
7,511✔
1699
        _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
7,511✔
1700
        const char *dash;
7,511✔
1701
        int r;
7,511✔
1702

1703
        assert(unit);
7,511✔
1704
        assert(ret);
7,511✔
1705

1706
        if (streq(unit, SPECIAL_ROOT_SLICE))
7,511✔
1707
                return strdup_to(ret, "");
7✔
1708

1709
        if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
7,504✔
1710
                return -EINVAL;
1711

1712
        if (!endswith(unit, ".slice"))
7,493✔
1713
                return -EINVAL;
1714

1715
        r = unit_name_to_prefix(unit, &p);
7,492✔
1716
        if (r < 0)
7,492✔
1717
                return r;
1718

1719
        dash = strchr(p, '-');
7,492✔
1720

1721
        /* Don't allow initial dashes */
1722
        if (dash == p)
7,492✔
1723
                return -EINVAL;
1724

1725
        while (dash) {
7,724✔
1726
                _cleanup_free_ char *escaped = NULL;
237✔
1727
                char n[dash - p + sizeof(".slice")];
237✔
1728

1729
#if HAS_FEATURE_MEMORY_SANITIZER
1730
                /* msan doesn't instrument stpncpy, so it thinks
1731
                 * n is later used uninitialized:
1732
                 * https://github.com/google/sanitizers/issues/926
1733
                 */
1734
                zero(n);
1735
#endif
1736

1737
                /* Don't allow trailing or double dashes */
1738
                if (IN_SET(dash[1], 0, '-'))
237✔
1739
                        return -EINVAL;
1740

1741
                strcpy(stpncpy(n, p, dash - p), ".slice");
235✔
1742
                if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
235✔
1743
                        return -EINVAL;
1744

1745
                r = cg_escape(n, &escaped);
235✔
1746
                if (r < 0)
235✔
1747
                        return r;
1748

1749
                if (!strextend(&s, escaped, "/"))
235✔
1750
                        return -ENOMEM;
1751

1752
                dash = strchr(dash+1, '-');
235✔
1753
        }
1754

1755
        r = cg_escape(unit, &e);
7,487✔
1756
        if (r < 0)
7,487✔
1757
                return r;
1758

1759
        if (!strextend(&s, e))
7,487✔
1760
                return -ENOMEM;
1761

1762
        *ret = TAKE_PTR(s);
7,487✔
1763
        return 0;
7,487✔
1764
}
1765

1766
int cg_is_threaded(const char *path) {
×
UNCOV
1767
        _cleanup_free_ char *fs = NULL, *contents = NULL;
×
UNCOV
1768
        _cleanup_strv_free_ char **v = NULL;
×
1769
        int r;
×
1770

UNCOV
1771
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, "cgroup.type", &fs);
×
UNCOV
1772
        if (r < 0)
×
1773
                return r;
1774

1775
        r = read_full_virtual_file(fs, &contents, NULL);
×
UNCOV
1776
        if (r == -ENOENT)
×
1777
                return false; /* Assume no. */
UNCOV
1778
        if (r < 0)
×
1779
                return r;
1780

UNCOV
1781
        v = strv_split(contents, NULL);
×
UNCOV
1782
        if (!v)
×
1783
                return -ENOMEM;
1784

1785
        /* If the cgroup is in the threaded mode, it contains "threaded".
1786
         * If one of the parents or siblings is in the threaded mode, it may contain "invalid". */
UNCOV
1787
        return strv_contains(v, "threaded") || strv_contains(v, "invalid");
×
1788
}
1789

1790
int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
48,933✔
1791
        _cleanup_free_ char *p = NULL;
48,933✔
1792
        int r;
48,933✔
1793

1794
        r = cg_get_path(controller, path, attribute, &p);
48,933✔
1795
        if (r < 0)
48,933✔
1796
                return r;
1797

1798
        return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
48,933✔
1799
}
1800

1801
int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
29,403✔
1802
        _cleanup_free_ char *p = NULL;
29,403✔
1803
        int r;
29,403✔
1804

1805
        r = cg_get_path(controller, path, attribute, &p);
29,403✔
1806
        if (r < 0)
29,403✔
1807
                return r;
1808

1809
        return read_one_line_file(p, ret);
29,403✔
1810
}
1811

1812
int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret) {
25,448✔
1813
        _cleanup_free_ char *value = NULL;
25,448✔
1814
        uint64_t v;
25,448✔
1815
        int r;
25,448✔
1816

1817
        assert(ret);
25,448✔
1818

1819
        r = cg_get_attribute(controller, path, attribute, &value);
25,448✔
1820
        if (r == -ENOENT)
25,448✔
1821
                return -ENODATA;
1822
        if (r < 0)
22,240✔
1823
                return r;
1824

1825
        if (streq(value, "max")) {
22,240✔
1826
                *ret = CGROUP_LIMIT_MAX;
4,688✔
1827
                return 0;
4,688✔
1828
        }
1829

1830
        r = safe_atou64(value, &v);
17,552✔
1831
        if (r < 0)
17,552✔
1832
                return r;
1833

1834
        *ret = v;
17,552✔
1835
        return 0;
17,552✔
1836
}
1837

1838
int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret) {
62✔
1839
        _cleanup_free_ char *value = NULL;
62✔
1840
        int r;
62✔
1841

1842
        assert(ret);
62✔
1843

1844
        r = cg_get_attribute(controller, path, attribute, &value);
62✔
1845
        if (r == -ENOENT)
62✔
1846
                return -ENODATA;
1847
        if (r < 0)
62✔
1848
                return r;
1849

1850
        r = parse_boolean(value);
62✔
1851
        if (r < 0)
62✔
1852
                return r;
1853

1854
        *ret = r;
62✔
1855
        return 0;
62✔
1856
}
1857

1858
int cg_get_owner(const char *path, uid_t *ret_uid) {
35✔
1859
        _cleanup_free_ char *f = NULL;
35✔
1860
        struct stat stats;
35✔
1861
        int r;
35✔
1862

1863
        assert(ret_uid);
35✔
1864

1865
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &f);
35✔
1866
        if (r < 0)
35✔
1867
                return r;
1868

1869
        if (stat(f, &stats) < 0)
35✔
1870
                return -errno;
16✔
1871

1872
        r = stat_verify_directory(&stats);
19✔
1873
        if (r < 0)
19✔
1874
                return r;
1875

1876
        *ret_uid = stats.st_uid;
19✔
1877
        return 0;
19✔
1878
}
1879

1880
int cg_get_keyed_attribute_full(
34,371✔
1881
                const char *controller,
1882
                const char *path,
1883
                const char *attribute,
1884
                char **keys,
1885
                char **ret_values,
1886
                CGroupKeyMode mode) {
1887

1888
        _cleanup_free_ char *filename = NULL, *contents = NULL;
34,371✔
1889
        const char *p;
34,371✔
1890
        size_t n, i, n_done = 0;
34,371✔
1891
        char **v;
34,371✔
1892
        int r;
34,371✔
1893

1894
        /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
1895
         * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
1896
         * entries as 'keys'. On success each entry will be set to the value of the matching key.
1897
         *
1898
         * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. If mode
1899
         * is set to GG_KEY_MODE_GRACEFUL we ignore missing keys and return those that were parsed successfully. */
1900

1901
        r = cg_get_path(controller, path, attribute, &filename);
34,371✔
1902
        if (r < 0)
34,371✔
1903
                return r;
1904

1905
        r = read_full_file(filename, &contents, NULL);
34,371✔
1906
        if (r < 0)
34,371✔
1907
                return r;
1908

1909
        n = strv_length(keys);
27,859✔
1910
        if (n == 0) /* No keys to retrieve? That's easy, we are done then */
27,859✔
1911
                return 0;
1912

1913
        /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
1914
        v = newa0(char*, n);
27,859✔
1915

1916
        for (p = contents; *p;) {
95,083✔
1917
                const char *w = NULL;
1918

1919
                for (i = 0; i < n; i++)
162,307✔
1920
                        if (!v[i]) {
104,947✔
1921
                                w = first_word(p, keys[i]);
95,083✔
1922
                                if (w)
95,083✔
1923
                                        break;
1924
                        }
1925

1926
                if (w) {
95,083✔
1927
                        size_t l;
37,723✔
1928

1929
                        l = strcspn(w, NEWLINE);
37,723✔
1930
                        v[i] = strndup(w, l);
37,723✔
1931
                        if (!v[i]) {
37,723✔
UNCOV
1932
                                r = -ENOMEM;
×
UNCOV
1933
                                goto fail;
×
1934
                        }
1935

1936
                        n_done++;
37,723✔
1937
                        if (n_done >= n)
37,723✔
1938
                                goto done;
27,859✔
1939

1940
                        p = w + l;
9,864✔
1941
                } else
1942
                        p += strcspn(p, NEWLINE);
57,360✔
1943

1944
                p += strspn(p, NEWLINE);
67,224✔
1945
        }
1946

UNCOV
1947
        if (mode & CG_KEY_MODE_GRACEFUL)
×
UNCOV
1948
                goto done;
×
1949

1950
        r = -ENXIO;
1951

UNCOV
1952
fail:
×
1953
        free_many_charp(v, n);
34,371✔
1954
        return r;
1955

1956
done:
27,859✔
1957
        memcpy(ret_values, v, sizeof(char*) * n);
27,859✔
1958
        if (mode & CG_KEY_MODE_GRACEFUL)
27,859✔
1959
                return n_done;
9,864✔
1960

1961
        return 0;
1962
}
1963

1964
int cg_mask_to_string(CGroupMask mask, char **ret) {
16,831✔
1965
        _cleanup_free_ char *s = NULL;
16,831✔
1966
        bool space = false;
16,831✔
1967
        CGroupController c;
16,831✔
1968
        size_t n = 0;
16,831✔
1969

1970
        assert(ret);
16,831✔
1971

1972
        if (mask == 0) {
16,831✔
1973
                *ret = NULL;
8,071✔
1974
                return 0;
8,071✔
1975
        }
1976

1977
        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
122,640✔
1978
                const char *k;
113,880✔
1979
                size_t l;
113,880✔
1980

1981
                if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
113,880✔
1982
                        continue;
35,553✔
1983

1984
                k = cgroup_controller_to_string(c);
78,327✔
1985
                l = strlen(k);
78,327✔
1986

1987
                if (!GREEDY_REALLOC(s, n + space + l + 1))
78,327✔
1988
                        return -ENOMEM;
1989

1990
                if (space)
78,327✔
1991
                        s[n] = ' ';
69,567✔
1992
                memcpy(s + n + space, k, l);
78,327✔
1993
                n += space + l;
78,327✔
1994

1995
                space = true;
78,327✔
1996
        }
1997

1998
        assert(s);
8,760✔
1999

2000
        s[n] = 0;
8,760✔
2001
        *ret = TAKE_PTR(s);
8,760✔
2002

2003
        return 0;
8,760✔
2004
}
2005

2006
int cg_mask_from_string(const char *value, CGroupMask *ret) {
6,411✔
2007
        CGroupMask m = 0;
6,411✔
2008

2009
        assert(ret);
6,411✔
2010
        assert(value);
6,411✔
2011

2012
        for (;;) {
57,577✔
2013
                _cleanup_free_ char *n = NULL;
51,166✔
2014
                CGroupController v;
57,577✔
2015
                int r;
57,577✔
2016

2017
                r = extract_first_word(&value, &n, NULL, 0);
57,577✔
2018
                if (r < 0)
57,577✔
UNCOV
2019
                        return r;
×
2020
                if (r == 0)
57,577✔
2021
                        break;
2022

2023
                v = cgroup_controller_from_string(n);
51,166✔
2024
                if (v < 0)
51,166✔
2025
                        continue;
725✔
2026

2027
                m |= CGROUP_CONTROLLER_TO_MASK(v);
50,441✔
2028
        }
2029

2030
        *ret = m;
6,411✔
2031
        return 0;
6,411✔
2032
}
2033

2034
int cg_mask_supported_subtree(const char *root, CGroupMask *ret) {
509✔
2035
        CGroupMask mask;
509✔
2036
        int r;
509✔
2037

2038
        /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
2039
         * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
2040
         * pseudo-controllers. */
2041

2042
        r = cg_all_unified();
509✔
2043
        if (r < 0)
509✔
2044
                return r;
509✔
2045
        if (r > 0) {
509✔
2046
                _cleanup_free_ char *controllers = NULL, *path = NULL;
509✔
2047

2048
                /* In the unified hierarchy we can read the supported and accessible controllers from
2049
                 * the top-level cgroup attribute */
2050

2051
                r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
509✔
2052
                if (r < 0)
509✔
2053
                        return r;
2054

2055
                r = read_one_line_file(path, &controllers);
509✔
2056
                if (r < 0)
509✔
2057
                        return r;
2058

2059
                r = cg_mask_from_string(controllers, &mask);
509✔
2060
                if (r < 0)
509✔
2061
                        return r;
2062

2063
                /* Mask controllers that are not supported in unified hierarchy. */
2064
                mask &= CGROUP_MASK_V2;
509✔
2065

2066
        } else {
2067
                CGroupController c;
×
2068

2069
                /* In the legacy hierarchy, we check which hierarchies are accessible. */
2070

UNCOV
2071
                mask = 0;
×
UNCOV
2072
                for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
×
UNCOV
2073
                        CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
×
UNCOV
2074
                        const char *n;
×
2075

UNCOV
2076
                        if (!FLAGS_SET(CGROUP_MASK_V1, bit))
×
UNCOV
2077
                                continue;
×
2078

UNCOV
2079
                        n = cgroup_controller_to_string(c);
×
UNCOV
2080
                        if (controller_is_v1_accessible(root, n) >= 0)
×
UNCOV
2081
                                mask |= bit;
×
2082
                }
2083
        }
2084

2085
        *ret = mask;
509✔
2086
        return 0;
509✔
2087
}
2088

2089
int cg_mask_supported(CGroupMask *ret) {
247✔
2090
        _cleanup_free_ char *root = NULL;
247✔
2091
        int r;
247✔
2092

2093
        r = cg_get_root_path(&root);
247✔
2094
        if (r < 0)
247✔
2095
                return r;
2096

2097
        return cg_mask_supported_subtree(root, ret);
247✔
2098
}
2099

2100
int cg_kernel_controllers(Set **ret) {
×
2101
        _cleanup_set_free_ Set *controllers = NULL;
×
2102
        _cleanup_fclose_ FILE *f = NULL;
×
UNCOV
2103
        int r;
×
2104

UNCOV
2105
        assert(ret);
×
2106

2107
        /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
2108
         * and controllers that aren't currently accessible (because not mounted). This does not include "name="
2109
         * pseudo-controllers. */
2110

2111
        r = fopen_unlocked("/proc/cgroups", "re", &f);
×
2112
        if (r == -ENOENT) {
×
UNCOV
2113
                *ret = NULL;
×
2114
                return 0;
×
2115
        }
2116
        if (r < 0)
×
2117
                return r;
2118

2119
        /* Ignore the header line */
UNCOV
2120
        (void) read_line(f, SIZE_MAX, NULL);
×
2121

UNCOV
2122
        for (;;) {
×
UNCOV
2123
                _cleanup_free_ char *controller = NULL;
×
UNCOV
2124
                int enabled = 0;
×
2125

2126
                if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
×
2127

2128
                        if (ferror(f))
×
UNCOV
2129
                                return -errno;
×
2130

2131
                        if (feof(f))
×
2132
                                break;
2133

2134
                        return -EBADMSG;
2135
                }
2136

UNCOV
2137
                if (!enabled)
×
2138
                        continue;
×
2139

UNCOV
2140
                if (!cg_controller_is_valid(controller))
×
2141
                        return -EBADMSG;
2142

UNCOV
2143
                r = set_ensure_consume(&controllers, &string_hash_ops_free, TAKE_PTR(controller));
×
UNCOV
2144
                if (r < 0)
×
2145
                        return r;
2146
        }
2147

UNCOV
2148
        *ret = TAKE_PTR(controllers);
×
2149

UNCOV
2150
        return 0;
×
2151
}
2152

2153
/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on
2154
 * /sys/fs/cgroup/systemd. This unfortunately broke other tools (such as docker) which expected the v1
2155
 * "name=systemd" hierarchy on /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mounts v2 on
2156
 * /sys/fs/cgroup/unified and maintains "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility
2157
 * with other tools.
2158
 *
2159
 * To keep live upgrade working, we detect and support v232 layout. When v232 layout is detected, to keep
2160
 * cgroup v2 process management but disable the compat dual layout, we return true on
2161
 * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and false on cg_hybrid_unified().
2162
 */
2163
static thread_local bool unified_systemd_v232;
2164

2165
int cg_unified_cached(bool flush) {
664,742✔
2166
        static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
664,742✔
2167

2168
        struct statfs fs;
664,742✔
2169

2170
        /* Checks if we support the unified hierarchy. Returns an
2171
         * error when the cgroup hierarchies aren't mounted yet or we
2172
         * have any other trouble determining if the unified hierarchy
2173
         * is supported. */
2174

2175
        if (flush)
664,742✔
2176
                unified_cache = CGROUP_UNIFIED_UNKNOWN;
18,351✔
2177
        else if (unified_cache >= CGROUP_UNIFIED_NONE)
646,391✔
2178
                return unified_cache;
664,742✔
2179

2180
        if (statfs("/sys/fs/cgroup/", &fs) < 0)
33,391✔
2181
                return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
×
2182

2183
        if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
33,391✔
2184
                log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
33,330✔
2185
                unified_cache = CGROUP_UNIFIED_ALL;
33,330✔
2186
        } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
61✔
2187
                if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
×
UNCOV
2188
                    F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
×
UNCOV
2189
                        log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
×
2190
                        unified_cache = CGROUP_UNIFIED_SYSTEMD;
×
2191
                        unified_systemd_v232 = false;
×
2192
                } else {
2193
                        if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) {
×
2194
                                if (errno == ENOENT) {
×
2195
                                        /* Some other software may have set up /sys/fs/cgroup in a configuration we do not recognize. */
2196
                                        log_debug_errno(errno, "Unsupported cgroupsv1 setup detected: name=systemd hierarchy not found.");
×
UNCOV
2197
                                        return -ENOMEDIUM;
×
2198
                                }
UNCOV
2199
                                return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
×
2200
                        }
2201

UNCOV
2202
                        if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
×
UNCOV
2203
                                log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
×
UNCOV
2204
                                unified_cache = CGROUP_UNIFIED_SYSTEMD;
×
UNCOV
2205
                                unified_systemd_v232 = true;
×
UNCOV
2206
                        } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
×
2207
                                log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
×
UNCOV
2208
                                unified_cache = CGROUP_UNIFIED_NONE;
×
2209
                        } else {
UNCOV
2210
                                log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
×
2211
                                          (unsigned long long) fs.f_type);
UNCOV
2212
                                unified_cache = CGROUP_UNIFIED_NONE;
×
2213
                        }
2214
                }
2215
        } else if (F_TYPE_EQUAL(fs.f_type, SYSFS_MAGIC)) {
61✔
2216
                return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
61✔
2217
                                       "No filesystem is currently mounted on /sys/fs/cgroup.");
2218
        } else
UNCOV
2219
                return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
×
2220
                                       "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
2221
                                       (unsigned long long)fs.f_type);
2222

2223
        return unified_cache;
33,330✔
2224
}
2225

2226
int cg_unified_controller(const char *controller) {
103,568✔
2227
        int r;
103,568✔
2228

2229
        r = cg_unified_cached(false);
103,568✔
2230
        if (r < 0)
103,568✔
2231
                return r;
2232

2233
        if (r == CGROUP_UNIFIED_NONE)
103,568✔
2234
                return false;
2235

2236
        if (r >= CGROUP_UNIFIED_ALL)
103,568✔
2237
                return true;
2238

UNCOV
2239
        return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
×
2240
}
2241

2242
int cg_all_unified(void) {
516,201✔
2243
        int r;
516,201✔
2244

2245
        r = cg_unified_cached(false);
516,201✔
2246
        if (r < 0)
516,201✔
2247
                return r;
2248

2249
        return r >= CGROUP_UNIFIED_ALL;
516,201✔
2250
}
2251

2252
int cg_hybrid_unified(void) {
26,621✔
2253
        int r;
26,621✔
2254

2255
        r = cg_unified_cached(false);
26,621✔
2256
        if (r < 0)
26,621✔
2257
                return r;
2258

2259
        return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
26,621✔
2260
}
2261

2262
int cg_is_delegated(const char *path) {
19✔
2263
        int r;
19✔
2264

2265
        assert(path);
19✔
2266

2267
        r = cg_get_xattr_bool(path, "trusted.delegate");
19✔
2268
        if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
19✔
2269
                return r;
2270

2271
        /* If the trusted xattr isn't set (preferred), then check the untrusted one. Under the assumption
2272
         * that whoever is trusted enough to own the cgroup, is also trusted enough to decide if it is
2273
         * delegated or not this should be safe. */
2274
        r = cg_get_xattr_bool(path, "user.delegate");
6✔
2275
        return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
6✔
2276
}
2277

2278
int cg_is_delegated_fd(int fd) {
214✔
2279
        int r;
214✔
2280

2281
        assert(fd >= 0);
214✔
2282

2283
        r = getxattr_at_bool(fd, /* path= */ NULL, "trusted.delegate", /* flags= */ 0);
214✔
2284
        if (!ERRNO_IS_NEG_XATTR_ABSENT(r))
214✔
2285
                return r;
2286

2287
        r = getxattr_at_bool(fd, /* path= */ NULL, "user.delegate", /* flags= */ 0);
200✔
2288
        return ERRNO_IS_NEG_XATTR_ABSENT(r) ? false : r;
200✔
2289
}
2290

2291
int cg_has_coredump_receive(const char *path) {
2✔
2292
        int r;
2✔
2293

2294
        assert(path);
2✔
2295

2296
        r = cg_get_xattr_bool(path, "user.coredump_receive");
2✔
2297
        if (ERRNO_IS_NEG_XATTR_ABSENT(r))
2✔
UNCOV
2298
                return false;
×
2299

2300
        return r;
2301
}
2302

2303
const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2304
        [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
2305
        [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
2306
        [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
2307
        [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
2308
};
2309

2310
static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2311
        [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
2312
        [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
2313
        [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
2314
        [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
2315
};
2316

2317
DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
4,321✔
2318

2319
bool is_cgroup_fs(const struct statfs *s) {
26✔
2320
        return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
28✔
2321
               is_fs_type(s, CGROUP2_SUPER_MAGIC);
2✔
2322
}
2323

2324
bool fd_is_cgroup_fs(int fd) {
1✔
2325
        struct statfs s;
1✔
2326

2327
        if (fstatfs(fd, &s) < 0)
1✔
UNCOV
2328
                return -errno;
×
2329

2330
        return is_cgroup_fs(&s);
1✔
2331
}
2332

2333
static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2334
        [CGROUP_CONTROLLER_CPU] = "cpu",
2335
        [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2336
        [CGROUP_CONTROLLER_CPUSET] = "cpuset",
2337
        [CGROUP_CONTROLLER_IO] = "io",
2338
        [CGROUP_CONTROLLER_BLKIO] = "blkio",
2339
        [CGROUP_CONTROLLER_MEMORY] = "memory",
2340
        [CGROUP_CONTROLLER_DEVICES] = "devices",
2341
        [CGROUP_CONTROLLER_PIDS] = "pids",
2342
        [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
2343
        [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
2344
        [CGROUP_CONTROLLER_BPF_FOREIGN] = "bpf-foreign",
2345
        [CGROUP_CONTROLLER_BPF_SOCKET_BIND] = "bpf-socket-bind",
2346
        [CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES] = "bpf-restrict-network-interfaces",
2347
};
2348

2349
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
386,819✔
2350

2351
CGroupMask get_cpu_accounting_mask(void) {
1,775,557✔
2352
        static CGroupMask needed_mask = (CGroupMask) -1;
1,775,557✔
2353

2354
        /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
2355
         * provided externally from the CPU controller, which means we don't
2356
         * need to enable the CPU controller just to get metrics. This is good,
2357
         * because enabling the CPU controller comes at a minor performance
2358
         * hit, especially when it's propagated deep into large hierarchies.
2359
         * There's also no separate CPU accounting controller available within
2360
         * a unified hierarchy.
2361
         *
2362
         * This combination of factors results in the desired cgroup mask to
2363
         * enable for CPU accounting varying as follows:
2364
         *
2365
         *                   ╔═════════════════════╤═════════════════════╗
2366
         *                   ║     Linux ≥4.15     │     Linux <4.15     ║
2367
         *   ╔═══════════════╬═════════════════════╪═════════════════════╣
2368
         *   ║ Unified       ║ nothing             │ CGROUP_MASK_CPU     ║
2369
         *   ╟───────────────╫─────────────────────┼─────────────────────╢
2370
         *   ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
2371
         *   ╚═══════════════╩═════════════════════╧═════════════════════╝
2372
         *
2373
         * We check kernel version here instead of manually checking whether
2374
         * cpu.stat is present for every cgroup, as that check in itself would
2375
         * already be fairly expensive.
2376
         *
2377
         * Kernels where this patch has been backported will therefore have the
2378
         * CPU controller enabled unnecessarily. This is more expensive than
2379
         * necessary, but harmless. ☺️
2380
         */
2381

2382
        if (needed_mask == (CGroupMask) -1) {
1,775,557✔
2383
                if (cg_all_unified()) {
719✔
2384
                        struct utsname u;
719✔
2385
                        assert_se(uname(&u) >= 0);
719✔
2386

2387
                        if (strverscmp_improved(u.release, "4.15") < 0)
719✔
UNCOV
2388
                                needed_mask = CGROUP_MASK_CPU;
×
2389
                        else
2390
                                needed_mask = 0;
719✔
2391
                } else
UNCOV
2392
                        needed_mask = CGROUP_MASK_CPUACCT;
×
2393
        }
2394

2395
        return needed_mask;
1,775,557✔
2396
}
2397

2398
bool cpu_accounting_is_cheap(void) {
2,324✔
2399
        return get_cpu_accounting_mask() == 0;
2,324✔
2400
}
2401

2402
static const char* const managed_oom_mode_table[_MANAGED_OOM_MODE_MAX] = {
2403
        [MANAGED_OOM_AUTO] = "auto",
2404
        [MANAGED_OOM_KILL] = "kill",
2405
};
2406

2407
DEFINE_STRING_TABLE_LOOKUP(managed_oom_mode, ManagedOOMMode);
37,985✔
2408

2409
static const char* const managed_oom_preference_table[_MANAGED_OOM_PREFERENCE_MAX] = {
2410
        [MANAGED_OOM_PREFERENCE_NONE] = "none",
2411
        [MANAGED_OOM_PREFERENCE_AVOID] = "avoid",
2412
        [MANAGED_OOM_PREFERENCE_OMIT] = "omit",
2413
};
2414

2415
DEFINE_STRING_TABLE_LOOKUP(managed_oom_preference, ManagedOOMPreference);
18,769✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc