• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 13912360373

17 Mar 2025 10:34PM UTC coverage: 71.946% (+0.03%) from 71.915%
13912360373

push

github

web-flow
nsresourced,vmspawn: allow unpriv "tap" based networking in vmspawn (#36688)

This extends nsresourced to also allow delegation of a network tap
device (in addition to veth) to unpriv clients, with a strictly enforced
naming scheme.

also tightens security on a couple of things:

* enforces polkit on all nsresourced ops too (though by default still
everything is allowed)
* put a limit on delegated network devices
* forcibly clean up delegated network devices when the userns goes away

145 of 375 new or added lines in 14 files covered. (38.67%)

2324 existing lines in 47 files now uncovered.

296268 of 411794 relevant lines covered (71.95%)

711485.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

58.62
/src/shared/cgroup-setup.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <unistd.h>
4

5
#include "cgroup-setup.h"
6
#include "cgroup-util.h"
7
#include "errno-util.h"
8
#include "fd-util.h"
9
#include "fileio.h"
10
#include "fs-util.h"
11
#include "missing_magic.h"
12
#include "mkdir.h"
13
#include "parse-util.h"
14
#include "path-util.h"
15
#include "process-util.h"
16
#include "recurse-dir.h"
17
#include "stdio-util.h"
18
#include "string-util.h"
19
#include "user-util.h"
20

21
int cg_weight_parse(const char *s, uint64_t *ret) {
428✔
22
        uint64_t u;
428✔
23
        int r;
428✔
24

25
        assert(s);
428✔
26
        assert(ret);
428✔
27

28
        if (isempty(s)) {
428✔
UNCOV
29
                *ret = CGROUP_WEIGHT_INVALID;
×
30
                return 0;
×
31
        }
32

33
        r = safe_atou64(s, &u);
428✔
34
        if (r < 0)
428✔
35
                return r;
36

37
        if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
428✔
38
                return -ERANGE;
39

40
        *ret = u;
428✔
41
        return 0;
428✔
42
}
43

44
int cg_cpu_weight_parse(const char *s, uint64_t *ret) {
426✔
45
        assert(s);
426✔
46
        assert(ret);
426✔
47

48
        if (streq(s, "idle"))
426✔
UNCOV
49
                return *ret = CGROUP_WEIGHT_IDLE;
×
50

51
        return cg_weight_parse(s, ret);
426✔
52
}
53

54
int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
1✔
55
        uint64_t u;
1✔
56
        int r;
1✔
57

58
        assert(s);
1✔
59
        assert(ret);
1✔
60

61
        if (isempty(s)) {
1✔
UNCOV
62
                *ret = CGROUP_CPU_SHARES_INVALID;
×
UNCOV
63
                return 0;
×
64
        }
65

66
        r = safe_atou64(s, &u);
1✔
67
        if (r < 0)
1✔
68
                return r;
69

70
        if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
1✔
71
                return -ERANGE;
72

73
        *ret = u;
1✔
74
        return 0;
1✔
75
}
76

UNCOV
77
int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
×
UNCOV
78
        uint64_t u;
×
UNCOV
79
        int r;
×
80

UNCOV
81
        assert(s);
×
UNCOV
82
        assert(ret);
×
83

UNCOV
84
        if (isempty(s)) {
×
UNCOV
85
                *ret = CGROUP_BLKIO_WEIGHT_INVALID;
×
UNCOV
86
                return 0;
×
87
        }
88

UNCOV
89
        r = safe_atou64(s, &u);
×
UNCOV
90
        if (r < 0)
×
91
                return r;
92

UNCOV
93
        if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
×
94
                return -ERANGE;
95

UNCOV
96
        *ret = u;
×
97
        return 0;
×
98
}
99

100
static int trim_cb(
161,755✔
101
                RecurseDirEvent event,
102
                const char *path,
103
                int dir_fd,
104
                int inode_fd,
105
                const struct dirent *de,
106
                const struct statx *sx,
107
                void *userdata) {
108

109
        /* Failures to delete inner cgroup we ignore (but debug log in case error code is unexpected) */
110
        if (event == RECURSE_DIR_LEAVE &&
161,755✔
111
            de->d_type == DT_DIR &&
638✔
112
            unlinkat(dir_fd, de->d_name, AT_REMOVEDIR) < 0 &&
319✔
113
            !IN_SET(errno, ENOENT, ENOTEMPTY, EBUSY))
201✔
114
                log_debug_errno(errno, "Failed to trim inner cgroup %s, ignoring: %m", path);
1✔
115

116
        return RECURSE_DIR_CONTINUE;
161,755✔
117
}
118

119
int cg_trim(const char *controller, const char *path, bool delete_root) {
7,754✔
120
        _cleanup_free_ char *fs = NULL;
7,754✔
121
        int r, q;
7,754✔
122

123
        assert(controller);
7,754✔
124

125
        r = cg_get_path(controller, path, NULL, &fs);
7,754✔
126
        if (r < 0)
7,754✔
127
                return r;
128

129
        r = recurse_dir_at(
7,754✔
130
                        AT_FDCWD,
131
                        fs,
132
                        /* statx_mask = */ 0,
133
                        /* n_depth_max = */ UINT_MAX,
134
                        RECURSE_DIR_ENSURE_TYPE,
135
                        trim_cb,
136
                        /* userdata = */ NULL);
137
        if (r == -ENOENT) /* non-existing is the ultimate trimming, hence no error */
7,754✔
138
                r = 0;
139
        else if (r < 0)
3,450✔
UNCOV
140
                log_debug_errno(r, "Failed to trim subcgroups of '%s': %m", path);
×
141

142
        /* If we shall delete the top-level cgroup, then propagate the failure to do so (except if it is
143
         * already gone anyway). Also, let's debug log about this failure, except if the error code is an
144
         * expected one. */
145
        if (delete_root && !empty_or_root(path) &&
15,357✔
146
            rmdir(fs) < 0 && errno != ENOENT) {
11,908✔
147
                if (!IN_SET(errno, ENOTEMPTY, EBUSY))
1✔
UNCOV
148
                        log_debug_errno(errno, "Failed to trim cgroup '%s': %m", path);
×
149
                RET_GATHER(r, -errno);
1✔
150
        }
151

152
        q = cg_hybrid_unified();
7,754✔
153
        if (q < 0)
7,754✔
154
                return q;
155
        if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER))
7,754✔
UNCOV
156
                (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
×
157

158
        return r;
159
}
160

161
/* Create a cgroup in the hierarchy of controller.
162
 * Returns 0 if the group already existed, 1 on success, negative otherwise.
163
 */
164
int cg_create(const char *controller, const char *path) {
6,292✔
165
        _cleanup_free_ char *fs = NULL;
6,292✔
166
        int r;
6,292✔
167

168
        assert(controller);
6,292✔
169

170
        r = cg_get_path_and_check(controller, path, NULL, &fs);
6,292✔
171
        if (r < 0)
6,292✔
172
                return r;
173

174
        r = mkdir_parents(fs, 0755);
6,292✔
175
        if (r < 0)
6,292✔
176
                return r;
177

178
        r = RET_NERRNO(mkdir(fs, 0755));
6,292✔
179
        if (r == -EEXIST)
1,577✔
180
                return 0;
181
        if (r < 0)
4,715✔
182
                return r;
183

184
        r = cg_hybrid_unified();
4,715✔
185
        if (r < 0)
4,715✔
186
                return r;
187
        if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
4,715✔
UNCOV
188
                r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
×
UNCOV
189
                if (r < 0)
×
190
                        log_warning_errno(r, "Failed to create compat systemd cgroup '%s', ignoring: %m", path);
6,292✔
191
        }
192

193
        return 1;
194
}
195

196
int cg_attach(const char *controller, const char *path, pid_t pid) {
13,796✔
197
        _cleanup_free_ char *fs = NULL;
13,796✔
198
        char c[DECIMAL_STR_MAX(pid_t) + 2];
13,796✔
199
        int r;
13,796✔
200

201
        assert(controller);
13,796✔
202
        assert(path);
13,796✔
203
        assert(pid >= 0);
13,796✔
204

205
        r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
13,796✔
206
        if (r < 0)
13,796✔
207
                return r;
208

209
        if (pid == 0)
13,796✔
210
                pid = getpid_cached();
13,612✔
211

212
        xsprintf(c, PID_FMT "\n", pid);
13,796✔
213

214
        r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
13,796✔
215
        if (r == -EOPNOTSUPP && cg_is_threaded(path) > 0)
13,796✔
216
                /* When the threaded mode is used, we cannot read/write the file. Let's return recognizable error. */
217
                return -EUCLEAN;
218
        if (r < 0)
13,796✔
219
                return r;
220

221
        r = cg_hybrid_unified();
13,793✔
222
        if (r < 0)
13,793✔
223
                return r;
224
        if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
13,793✔
UNCOV
225
                r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
×
UNCOV
226
                if (r < 0)
×
227
                        log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup '%s', ignoring: %m", pid, path);
13,796✔
228
        }
229

230
        return 0;
231
}
232

233
int cg_fd_attach(int fd, pid_t pid) {
7✔
234
        char c[DECIMAL_STR_MAX(pid_t) + 2];
7✔
235

236
        assert(fd >= 0);
7✔
237
        assert(pid >= 0);
7✔
238

239
        if (pid == 0)
7✔
UNCOV
240
                pid = getpid_cached();
×
241

242
        xsprintf(c, PID_FMT "\n", pid);
7✔
243

244
        return write_string_file_at(fd, "cgroup.procs", c, WRITE_STRING_FILE_DISABLE_BUFFER);
7✔
245
}
246

247
int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
×
UNCOV
248
        int r;
×
249

250
        assert(controller);
×
UNCOV
251
        assert(path);
×
UNCOV
252
        assert(pid >= 0);
×
253

254
        r = cg_attach(controller, path, pid);
×
UNCOV
255
        if (r < 0) {
×
UNCOV
256
                char prefix[strlen(path) + 1];
×
257

258
                /* This didn't work? Then let's try all prefixes of the destination */
259

UNCOV
260
                PATH_FOREACH_PREFIX(prefix, path) {
×
UNCOV
261
                        int q;
×
262

UNCOV
263
                        q = cg_attach(controller, prefix, pid);
×
UNCOV
264
                        if (q >= 0)
×
UNCOV
265
                                return q;
×
266
                }
267
        }
268

269
        return r;
270
}
271

272
int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
441✔
273
        int r, q;
441✔
274

275
        /* This does not remove the cgroup on failure */
276

277
        assert(pid >= 0);
441✔
278

279
        r = cg_create(controller, path);
441✔
280
        if (r < 0)
441✔
281
                return r;
282

283
        q = cg_attach(controller, path, pid);
441✔
284
        if (q < 0)
441✔
UNCOV
285
                return q;
×
286

287
        return r;
288
}
289

290
int cg_set_access(
675✔
291
                const char *controller,
292
                const char *path,
293
                uid_t uid,
294
                gid_t gid) {
295

296
        struct Attribute {
675✔
297
                const char *name;
298
                bool fatal;
299
        };
300

301
        /* cgroup v1, aka legacy/non-unified */
302
        static const struct Attribute legacy_attributes[] = {
675✔
303
                { "cgroup.procs",           true  },
304
                { "tasks",                  false },
305
                { "cgroup.clone_children",  false },
306
                {},
307
        };
308

309
        /* cgroup v2, aka unified */
310
        static const struct Attribute unified_attributes[] = {
675✔
311
                { "cgroup.procs",           true  },
312
                { "cgroup.subtree_control", true  },
313
                { "cgroup.threads",         false },
314
                { "memory.oom.group",       false },
315
                { "memory.reclaim",         false },
316
                {},
317
        };
318

319
        static const struct Attribute* const attributes[] = {
675✔
320
                [false] = legacy_attributes,
321
                [true]  = unified_attributes,
322
        };
323

324
        _cleanup_free_ char *fs = NULL;
675✔
325
        const struct Attribute *i;
675✔
326
        int r, unified;
675✔
327

328
        assert(path);
675✔
329

330
        if (uid == UID_INVALID && gid == GID_INVALID)
675✔
331
                return 0;
332

333
        unified = cg_unified_controller(controller);
151✔
334
        if (unified < 0)
151✔
335
                return unified;
336

337
        /* Configure access to the cgroup itself */
338
        r = cg_get_path(controller, path, NULL, &fs);
151✔
339
        if (r < 0)
151✔
340
                return r;
341

342
        r = chmod_and_chown(fs, 0755, uid, gid);
151✔
343
        if (r < 0)
151✔
344
                return r;
345

346
        /* Configure access to the cgroup's attributes */
347
        for (i = attributes[unified]; i->name; i++) {
906✔
348
                fs = mfree(fs);
755✔
349

350
                r = cg_get_path(controller, path, i->name, &fs);
755✔
351
                if (r < 0)
755✔
352
                        return r;
353

354
                r = chmod_and_chown(fs, 0644, uid, gid);
755✔
355
                if (r < 0) {
755✔
UNCOV
356
                        if (i->fatal)
×
357
                                return r;
358

359
                        log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
755✔
360
                }
361
        }
362

363
        if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
151✔
364
                r = cg_hybrid_unified();
151✔
365
                if (r < 0)
151✔
366
                        return r;
367
                if (r > 0) {
151✔
368
                        /* Always propagate access mode from unified to legacy controller */
UNCOV
369
                        r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
×
UNCOV
370
                        if (r < 0)
×
371
                                log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
675✔
372
                }
373
        }
374

375
        return 0;
376
}
377

378
struct access_callback_data {
379
        uid_t uid;
380
        gid_t gid;
381
        int error;
382
};
383

384
static int access_callback(
2,711✔
385
                RecurseDirEvent event,
386
                const char *path,
387
                int dir_fd,
388
                int inode_fd,
389
                const struct dirent *de,
390
                const struct statx *sx,
391
                void *userdata) {
392

393
        if (!IN_SET(event, RECURSE_DIR_ENTER, RECURSE_DIR_ENTRY))
2,711✔
394
                return RECURSE_DIR_CONTINUE;
395

396
        struct access_callback_data *d = ASSERT_PTR(userdata);
2,571✔
397

398
        assert(path);
2,571✔
399
        assert(inode_fd >= 0);
2,571✔
400

401
        if (fchownat(inode_fd, "", d->uid, d->gid, AT_EMPTY_PATH) < 0)
2,571✔
UNCOV
402
                RET_GATHER(d->error, log_debug_errno(errno, "Failed to change ownership of '%s', ignoring: %m", path));
×
403

404
        return RECURSE_DIR_CONTINUE;
405
}
406

407
int cg_set_access_recursive(
337✔
408
                const char *controller,
409
                const char *path,
410
                uid_t uid,
411
                gid_t gid) {
412

413
        _cleanup_close_ int fd = -EBADF;
337✔
414
        _cleanup_free_ char *fs = NULL;
337✔
415
        int r;
337✔
416

417
        assert(controller);
337✔
418
        assert(path);
337✔
419

420
        /* A recursive version of cg_set_access(). But note that this one changes ownership of *all* files,
421
         * not just the allowlist that cg_set_access() uses. Use cg_set_access() on the cgroup you want to
422
         * delegate, and cg_set_access_recursive() for any subcgroups you might want to create below it. */
423

424
        if (!uid_is_valid(uid) && !gid_is_valid(gid))
534✔
425
                return 0;
426

427
        r = cg_get_path(controller, path, NULL, &fs);
140✔
428
        if (r < 0)
140✔
429
                return r;
430

431
        fd = open(fs, O_DIRECTORY|O_CLOEXEC);
140✔
432
        if (fd < 0)
140✔
UNCOV
433
                return -errno;
×
434

435
        struct access_callback_data d = {
140✔
436
                .uid = uid,
437
                .gid = gid,
438
        };
439

440
        r = recurse_dir(fd,
140✔
441
                        fs,
442
                        /* statx_mask= */ 0,
443
                        /* n_depth_max= */ UINT_MAX,
444
                        RECURSE_DIR_SAME_MOUNT|RECURSE_DIR_INODE_FD|RECURSE_DIR_TOPLEVEL,
445
                        access_callback,
446
                        &d);
447
        if (r < 0)
140✔
448
                return r;
449

450
        assert(d.error <= 0);
140✔
451
        return d.error;
452
}
453

454
int cg_migrate(
265✔
455
                const char *cfrom,
456
                const char *pfrom,
457
                const char *cto,
458
                const char *pto,
459
                CGroupFlags flags) {
460

461
        _cleanup_set_free_ Set *s = NULL;
265✔
462
        bool done;
265✔
463
        int r, ret = 0;
265✔
464

465
        assert(cfrom);
265✔
466
        assert(pfrom);
265✔
467
        assert(cto);
265✔
468
        assert(pto);
265✔
469

470
        do {
266✔
UNCOV
471
                _cleanup_fclose_ FILE *f = NULL;
×
472
                pid_t pid;
266✔
473

474
                done = true;
266✔
475

476
                r = cg_enumerate_processes(cfrom, pfrom, &f);
266✔
477
                if (r < 0)
266✔
UNCOV
478
                        return RET_GATHER(ret, r);
×
479

480
                while ((r = cg_read_pid(f, &pid, flags)) > 0) {
6,329✔
481
                        /* Throw an error if unmappable PIDs are in output, we can't migrate those. */
482
                        if (pid == 0)
6,063✔
483
                                return -EREMOTE;
484

485
                        /* This might do weird stuff if we aren't a single-threaded program. However, we
486
                         * luckily know we are. */
487
                        if (FLAGS_SET(flags, CGROUP_IGNORE_SELF) && pid == getpid_cached())
6,063✔
UNCOV
488
                                continue;
×
489

490
                        if (set_contains(s, PID_TO_PTR(pid)))
6,063✔
UNCOV
491
                                continue;
×
492

493
                        if (pid_is_kernel_thread(pid) > 0)
6,063✔
494
                                continue;
6,062✔
495

496
                        r = cg_attach(cto, pto, pid);
1✔
497
                        if (r < 0) {
1✔
UNCOV
498
                                if (r != -ESRCH)
×
UNCOV
499
                                        RET_GATHER(ret, r);
×
500
                        } else if (ret == 0)
1✔
501
                                ret = 1;
1✔
502

503
                        done = false;
1✔
504

505
                        r = set_ensure_put(&s, /* hash_ops = */ NULL, PID_TO_PTR(pid));
1✔
506
                        if (r < 0)
1✔
UNCOV
507
                                return RET_GATHER(ret, r);
×
508
                }
509
                if (r < 0)
266✔
UNCOV
510
                        return RET_GATHER(ret, r);
×
511
        } while (!done);
266✔
512

513
        return ret;
514
}
515

516
int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
5,738✔
517
        CGroupController c;
5,738✔
518
        CGroupMask done;
5,738✔
519
        bool created;
5,738✔
520
        int r;
5,738✔
521

522
        /* This one will create a cgroup in our private tree, but also
523
         * duplicate it in the trees specified in mask, and remove it
524
         * in all others.
525
         *
526
         * Returns 0 if the group already existed in the systemd hierarchy,
527
         * 1 on success, negative otherwise.
528
         */
529

530
        /* First create the cgroup in our own hierarchy. */
531
        r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
5,738✔
532
        if (r < 0)
5,738✔
533
                return r;
534
        created = r;
5,738✔
535

536
        /* If we are in the unified hierarchy, we are done now */
537
        r = cg_all_unified();
5,738✔
538
        if (r < 0)
5,738✔
539
                return r;
540
        if (r > 0)
5,738✔
541
                return created;
5,738✔
542

UNCOV
543
        supported &= CGROUP_MASK_V1;
×
UNCOV
544
        mask = CGROUP_MASK_EXTEND_JOINED(mask);
×
UNCOV
545
        done = 0;
×
546

547
        /* Otherwise, do the same in the other hierarchies */
UNCOV
548
        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
×
UNCOV
549
                CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
×
UNCOV
550
                const char *n;
×
551

UNCOV
552
                if (!FLAGS_SET(supported, bit))
×
UNCOV
553
                        continue;
×
554

UNCOV
555
                if (FLAGS_SET(done, bit))
×
UNCOV
556
                        continue;
×
557

UNCOV
558
                n = cgroup_controller_to_string(c);
×
559
                if (FLAGS_SET(mask, bit))
×
UNCOV
560
                        (void) cg_create(n, path);
×
561

UNCOV
562
                done |= CGROUP_MASK_EXTEND_JOINED(bit);
×
563
        }
564

UNCOV
565
        return created;
×
566
}
567

568
int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid) {
13,330✔
569
        int r;
13,330✔
570

571
        assert(path);
13,330✔
572
        assert(pid >= 0);
13,330✔
573

574
        r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
13,330✔
575
        if (r < 0)
13,330✔
576
                return r;
577

578
        r = cg_all_unified();
13,330✔
579
        if (r < 0)
13,330✔
580
                return r;
581
        if (r > 0)
13,330✔
582
                return 0;
583

UNCOV
584
        supported &= CGROUP_MASK_V1;
×
UNCOV
585
        CGroupMask done = 0;
×
586

UNCOV
587
        for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
×
UNCOV
588
                CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
×
589

590
                if (!FLAGS_SET(supported, bit))
×
UNCOV
591
                        continue;
×
592

UNCOV
593
                if (FLAGS_SET(done, bit))
×
UNCOV
594
                        continue;
×
595

UNCOV
596
                (void) cg_attach_fallback(cgroup_controller_to_string(c), path, pid);
×
UNCOV
597
                done |= CGROUP_MASK_EXTEND_JOINED(bit);
×
598
        }
599

600
        return 0;
601
}
602

603
int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
7,600✔
604
        int r, q;
7,600✔
605

606
        assert(path);
7,600✔
607

608
        r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
7,600✔
609
        if (r < 0)
7,600✔
610
                return r;
611

612
        q = cg_all_unified();
7,599✔
613
        if (q < 0)
7,599✔
614
                return q;
615
        if (q > 0)
7,599✔
616
                return r;
617

UNCOV
618
        return cg_trim_v1_controllers(supported, _CGROUP_MASK_ALL, path, delete_root);
×
619
}
620

621
int cg_enable_everywhere(
5,883✔
622
                CGroupMask supported,
623
                CGroupMask mask,
624
                const char *p,
625
                CGroupMask *ret_result_mask) {
626

627
        _cleanup_fclose_ FILE *f = NULL;
5,883✔
628
        _cleanup_free_ char *fs = NULL;
5,883✔
629
        CGroupController c;
5,883✔
630
        CGroupMask ret = 0;
5,883✔
631
        int r;
5,883✔
632

633
        assert(p);
5,883✔
634

635
        if (supported == 0) {
5,883✔
UNCOV
636
                if (ret_result_mask)
×
UNCOV
637
                        *ret_result_mask = 0;
×
UNCOV
638
                return 0;
×
639
        }
640

641
        r = cg_all_unified();
5,883✔
642
        if (r < 0)
5,883✔
643
                return r;
644
        if (r == 0) {
5,883✔
645
                /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
646
                 * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
647
                 * caller tends to use the returned mask later on to compare if all controllers where properly joined,
648
                 * and if not requeues realization. This use is the primary purpose of the return value, hence let's
649
                 * minimize surprises here and reduce triggers for re-realization by always saying we fully
650
                 * succeeded.) */
UNCOV
651
                if (ret_result_mask)
×
UNCOV
652
                        *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
×
653
                                                                               * CGROUP_MASK_V2: The 'supported' mask
654
                                                                               * might contain pure-V1 or BPF
655
                                                                               * controllers, and we never want to
656
                                                                               * claim that we could enable those with
657
                                                                               * cgroup.subtree_control */
UNCOV
658
                return 0;
×
659
        }
660

661
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
5,883✔
662
        if (r < 0)
5,883✔
663
                return r;
664

665
        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
82,362✔
666
                CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
76,479✔
667
                const char *n;
76,479✔
668

669
                if (!FLAGS_SET(CGROUP_MASK_V2, bit))
76,479✔
670
                        continue;
47,064✔
671

672
                if (!FLAGS_SET(supported, bit))
29,415✔
673
                        continue;
2,587✔
674

675
                n = cgroup_controller_to_string(c);
26,828✔
676
                {
26,828✔
677
                        char s[1 + strlen(n) + 1];
26,828✔
678

679
                        s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
26,828✔
680
                        strcpy(s + 1, n);
26,828✔
681

682
                        if (!f) {
26,828✔
683
                                f = fopen(fs, "we");
5,860✔
684
                                if (!f)
5,860✔
UNCOV
685
                                        return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
×
686
                        }
687

688
                        r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
26,828✔
689
                        if (r < 0) {
26,828✔
690
                                log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
6✔
691
                                                FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
692
                                clearerr(f);
6✔
693

694
                                /* If we can't turn off a controller, leave it on in the reported resulting mask. This
695
                                 * happens for example when we attempt to turn off a controller up in the tree that is
696
                                 * used down in the tree. */
697
                                if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
6✔
698
                                                                           * only here, and not follow the same logic
699
                                                                           * for other errors such as EINVAL or
700
                                                                           * EOPNOTSUPP or anything else. That's
701
                                                                           * because EBUSY indicates that the
702
                                                                           * controllers is currently enabled and
703
                                                                           * cannot be disabled because something down
704
                                                                           * the hierarchy is still using it. Any other
705
                                                                           * error most likely means something like "I
706
                                                                           * never heard of this controller" or
707
                                                                           * similar. In the former case it's hence
708
                                                                           * safe to assume the controller is still on
709
                                                                           * after the failed operation, while in the
710
                                                                           * latter case it's safer to assume the
711
                                                                           * controller is unknown and hence certainly
712
                                                                           * not enabled. */
713
                                        ret |= bit;
×
714
                        } else {
715
                                /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
716
                                if (FLAGS_SET(mask, bit))
26,822✔
717
                                        ret |= bit;
3,992✔
718
                        }
719
                }
720
        }
721

722
        /* Let's return the precise set of controllers now enabled for the cgroup. */
723
        if (ret_result_mask)
5,883✔
724
                *ret_result_mask = ret;
5,716✔
725

726
        return 0;
727
}
728

729
int cg_migrate_recursive(
3✔
730
                const char *cfrom,
731
                const char *pfrom,
732
                const char *cto,
733
                const char *pto,
734
                CGroupFlags flags) {
735

736
        _cleanup_closedir_ DIR *d = NULL;
3✔
737
        int r, ret = 0;
3✔
738
        char *fn;
3✔
739

740
        assert(cfrom);
3✔
741
        assert(pfrom);
3✔
742
        assert(cto);
3✔
743
        assert(pto);
3✔
744

745
        ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
3✔
746

747
        r = cg_enumerate_subgroups(cfrom, pfrom, &d);
3✔
748
        if (r < 0) {
3✔
UNCOV
749
                if (ret >= 0 && r != -ENOENT)
×
750
                        return r;
751

UNCOV
752
                return ret;
×
753
        }
754

755
        while ((r = cg_read_subgroup(d, &fn)) > 0) {
5✔
756
                _cleanup_free_ char *p = NULL;
2✔
757

758
                p = path_join(empty_to_root(pfrom), fn);
4✔
759
                free(fn);
2✔
760
                if (!p)
2✔
UNCOV
761
                        return -ENOMEM;
×
762

763
                r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
2✔
764
                if (r != 0 && ret >= 0)
2✔
765
                        ret = r;
1✔
766
        }
767

768
        if (r < 0 && ret >= 0)
3✔
UNCOV
769
                ret = r;
×
770

771
        return ret;
772
}
773

UNCOV
774
int cg_migrate_recursive_fallback(
×
775
                const char *cfrom,
776
                const char *pfrom,
777
                const char *cto,
778
                const char *pto,
779
                CGroupFlags flags) {
780

UNCOV
781
        int r;
×
782

UNCOV
783
        assert(cfrom);
×
UNCOV
784
        assert(pfrom);
×
UNCOV
785
        assert(cto);
×
UNCOV
786
        assert(pto);
×
787

UNCOV
788
        r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
×
UNCOV
789
        if (r < 0) {
×
UNCOV
790
                char prefix[strlen(pto) + 1];
×
791

792
                /* This didn't work? Then let's try all prefixes of the destination */
793

794
                PATH_FOREACH_PREFIX(prefix, pto) {
×
795
                        int q;
×
796

UNCOV
797
                        q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
×
UNCOV
798
                        if (q >= 0)
×
UNCOV
799
                                return q;
×
800
                }
801
        }
802

803
        return r;
804
}
805

UNCOV
806
int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) {
×
UNCOV
807
        CGroupController c;
×
808
        CGroupMask done;
×
809
        int r = 0, q;
×
810

UNCOV
811
        assert(to_callback);
×
812

UNCOV
813
        supported &= CGROUP_MASK_V1;
×
UNCOV
814
        mask = CGROUP_MASK_EXTEND_JOINED(mask);
×
815
        done = 0;
×
816

UNCOV
817
        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
×
UNCOV
818
                CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
×
UNCOV
819
                const char *to = NULL;
×
820

UNCOV
821
                if (!FLAGS_SET(supported, bit))
×
UNCOV
822
                        continue;
×
823

UNCOV
824
                if (FLAGS_SET(done, bit))
×
UNCOV
825
                        continue;
×
826

UNCOV
827
                if (!FLAGS_SET(mask, bit))
×
UNCOV
828
                        continue;
×
829

UNCOV
830
                to = to_callback(bit, userdata);
×
831

832
                /* Remember first error and try continuing */
UNCOV
833
                q = cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, from, cgroup_controller_to_string(c), to, 0);
×
UNCOV
834
                r = (r < 0) ? r : q;
×
835

UNCOV
836
                done |= CGROUP_MASK_EXTEND_JOINED(bit);
×
837
        }
838

UNCOV
839
        return r;
×
840
}
841

842
int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root) {
×
UNCOV
843
        CGroupController c;
×
UNCOV
844
        CGroupMask done;
×
UNCOV
845
        int r = 0, q;
×
846

UNCOV
847
        supported &= CGROUP_MASK_V1;
×
UNCOV
848
        mask = CGROUP_MASK_EXTEND_JOINED(mask);
×
UNCOV
849
        done = 0;
×
850

UNCOV
851
        for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
×
UNCOV
852
                CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
×
853

UNCOV
854
                if (!FLAGS_SET(supported, bit))
×
UNCOV
855
                        continue;
×
856

UNCOV
857
                if (FLAGS_SET(done, bit))
×
UNCOV
858
                        continue;
×
859

UNCOV
860
                if (FLAGS_SET(mask, bit)) {
×
861
                        /* Remember first error and try continuing */
UNCOV
862
                        q = cg_trim(cgroup_controller_to_string(c), path, delete_root);
×
UNCOV
863
                        r = (r < 0) ? r : q;
×
864
                }
UNCOV
865
                done |= CGROUP_MASK_EXTEND_JOINED(bit);
×
866
        }
867

UNCOV
868
        return r;
×
869
}
870

UNCOV
871
int cg_install_release_agent(const char *controller, const char *agent) {
×
UNCOV
872
        _cleanup_free_ char *fs = NULL, *contents = NULL;
×
UNCOV
873
        const char *sc;
×
UNCOV
874
        int r;
×
875

UNCOV
876
        assert(agent);
×
877

UNCOV
878
        r = cg_unified_controller(controller);
×
UNCOV
879
        if (r < 0)
×
880
                return r;
UNCOV
881
        if (r > 0) /* doesn't apply to unified hierarchy */
×
882
                return -EOPNOTSUPP;
883

UNCOV
884
        r = cg_get_path(controller, NULL, "release_agent", &fs);
×
UNCOV
885
        if (r < 0)
×
886
                return r;
887

UNCOV
888
        r = read_one_line_file(fs, &contents);
×
UNCOV
889
        if (r < 0)
×
890
                return r;
891

UNCOV
892
        sc = strstrip(contents);
×
UNCOV
893
        if (isempty(sc)) {
×
UNCOV
894
                r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
×
UNCOV
895
                if (r < 0)
×
896
                        return r;
UNCOV
897
        } else if (!path_equal(sc, agent))
×
898
                return -EEXIST;
899

UNCOV
900
        fs = mfree(fs);
×
UNCOV
901
        r = cg_get_path(controller, NULL, "notify_on_release", &fs);
×
UNCOV
902
        if (r < 0)
×
903
                return r;
904

UNCOV
905
        contents = mfree(contents);
×
906
        r = read_one_line_file(fs, &contents);
×
UNCOV
907
        if (r < 0)
×
908
                return r;
909

UNCOV
910
        sc = strstrip(contents);
×
UNCOV
911
        if (streq(sc, "0")) {
×
UNCOV
912
                r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
×
UNCOV
913
                if (r < 0)
×
914
                        return r;
915

UNCOV
916
                return 1;
×
917
        }
918

UNCOV
919
        if (!streq(sc, "1"))
×
UNCOV
920
                return -EIO;
×
921

922
        return 0;
923
}
924

925
int cg_uninstall_release_agent(const char *controller) {
3✔
926
        _cleanup_free_ char *fs = NULL;
3✔
927
        int r;
3✔
928

929
        r = cg_unified_controller(controller);
3✔
930
        if (r < 0)
3✔
931
                return r;
932
        if (r > 0) /* Doesn't apply to unified hierarchy */
3✔
933
                return -EOPNOTSUPP;
934

UNCOV
935
        r = cg_get_path(controller, NULL, "notify_on_release", &fs);
×
UNCOV
936
        if (r < 0)
×
937
                return r;
938

UNCOV
939
        r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
×
940
        if (r < 0)
×
941
                return r;
942

943
        fs = mfree(fs);
×
944

945
        r = cg_get_path(controller, NULL, "release_agent", &fs);
×
946
        if (r < 0)
×
947
                return r;
948

UNCOV
949
        r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
×
UNCOV
950
        if (r < 0)
×
951
                return r;
×
952

953
        return 0;
954
}
955

956
int cg_has_legacy(void) {
250✔
957
        struct statfs fs;
250✔
958

959
        /* Checks if any legacy controller/hierarchy is mounted. */
960

961
        if (statfs("/sys/fs/cgroup/", &fs) < 0) {
250✔
UNCOV
962
                if (errno == ENOENT) /* sysfs not mounted? */
×
963
                        return false;
250✔
964

965
                return log_error_errno(errno, "Failed to statfs /sys/fs/cgroup/: %m");
×
966
        }
967

968
        if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC) ||
250✔
UNCOV
969
            is_fs_type(&fs, SYSFS_MAGIC)) /* not mounted yet */
×
970
                return false;
971

972
        if (is_fs_type(&fs, TMPFS_MAGIC)) {
×
UNCOV
973
                log_info("Found tmpfs on /sys/fs/cgroup/, assuming legacy hierarchy.");
×
974
                return true;
×
975
        }
976

UNCOV
977
        return log_error_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
×
978
                               "Unknown filesystem type %llx mounted on /sys/fs/cgroup/.",
979
                               (unsigned long long) fs.f_type);
980
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc