• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 16280725298

14 Jul 2025 08:16PM UTC coverage: 72.166% (-0.006%) from 72.172%
16280725298

push

github

web-flow
Two fixlets for coverage test (#38183)

302135 of 418667 relevant lines covered (72.17%)

773261.64 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.13
/src/nspawn/nspawn-mount.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <linux/magic.h>
4
#include <sys/mount.h>
5
#include <unistd.h>
6

7
#include "alloc-util.h"
8
#include "chase.h"
9
#include "errno-util.h"
10
#include "escape.h"
11
#include "extract-word.h"
12
#include "fd-util.h"
13
#include "format-util.h"
14
#include "fs-util.h"
15
#include "log.h"
16
#include "mkdir-label.h"
17
#include "mount-util.h"
18
#include "mountpoint-util.h"
19
#include "namespace-util.h"
20
#include "nspawn-mount.h"
21
#include "path-util.h"
22
#include "rm-rf.h"
23
#include "sort-util.h"
24
#include "stat-util.h"
25
#include "string-util.h"
26
#include "strv.h"
27
#include "tmpfile-util.h"
28
#include "user-util.h"
29

30
CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t) {
524✔
31
        CustomMount *ret;
524✔
32

33
        assert(l);
524✔
34
        assert(n);
524✔
35
        assert(t >= 0);
524✔
36
        assert(t < _CUSTOM_MOUNT_TYPE_MAX);
524✔
37

38
        if (!GREEDY_REALLOC(*l, *n + 1))
524✔
39
                return NULL;
40

41
        ret = *l + *n;
524✔
42
        (*n)++;
524✔
43

44
        *ret = (CustomMount) {
524✔
45
                .type = t,
46
                .destination_uid = UID_INVALID,
47
        };
48

49
        return ret;
524✔
50
}
51

52
void custom_mount_free_all(CustomMount *l, size_t n) {
1,176✔
53
        FOREACH_ARRAY(m, l, n) {
1,430✔
54
                free(m->source);
254✔
55
                free(m->destination);
254✔
56
                free(m->options);
254✔
57

58
                if (m->work_dir) {
254✔
59
                        (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
3✔
60
                        free(m->work_dir);
3✔
61
                }
62

63
                if (m->rm_rf_tmpdir) {
254✔
64
                        (void) rm_rf(m->rm_rf_tmpdir, REMOVE_ROOT|REMOVE_PHYSICAL);
2✔
65
                        free(m->rm_rf_tmpdir);
2✔
66
                }
67

68
                strv_free(m->lower);
254✔
69
                free(m->type_argument);
254✔
70
        }
71

72
        free(l);
1,176✔
73
}
1,176✔
74

75
static int custom_mount_compare(const CustomMount *a, const CustomMount *b) {
103✔
76
        int r;
103✔
77

78
        r = path_compare(a->destination, b->destination);
103✔
79
        if (r != 0)
103✔
80
                return r;
81

82
        return CMP(a->type, b->type);
×
83
}
84

85
static int source_path_parse(const char *p, char **ret) {
491✔
86
        assert(p);
491✔
87
        assert(ret);
491✔
88

89
        if (isempty(p))
491✔
90
                return -EINVAL;
91

92
        if (*p == '+') {
491✔
93
                if (!path_is_absolute(p + 1))
7✔
94
                        return -EINVAL;
491✔
95

96
                char *s = strdup(p);
7✔
97
                if (!s)
7✔
98
                        return -ENOMEM;
99

100
                *ret = TAKE_PTR(s);
7✔
101
                return 0;
7✔
102
        }
103

104
        return path_make_absolute_cwd(p, ret);
484✔
105
}
106

107
static int source_path_parse_nullable(const char *p, char **ret) {
487✔
108
        assert(p);
487✔
109
        assert(ret);
487✔
110

111
        if (isempty(p)) {
487✔
112
                *ret = NULL;
7✔
113
                return 0;
7✔
114
        }
115

116
        return source_path_parse(p, ret);
480✔
117
}
118

119
static char *resolve_source_path(const char *dest, const char *source) {
384✔
120
        if (!source)
384✔
121
                return NULL;
122

123
        if (source[0] == '+')
384✔
124
                return path_join(dest, source + 1);
8✔
125

126
        return strdup(source);
376✔
127
}
128

129
static int allocate_temporary_source(CustomMount *m) {
8✔
130
        int r;
8✔
131

132
        assert(m);
8✔
133
        assert(!m->source);
8✔
134
        assert(!m->rm_rf_tmpdir);
8✔
135

136
        r = mkdtemp_malloc("/var/tmp/nspawn-temp-XXXXXX", &m->rm_rf_tmpdir);
8✔
137
        if (r < 0)
8✔
138
                return log_error_errno(r, "Failed to acquire temporary directory: %m");
×
139

140
        m->source = path_join(m->rm_rf_tmpdir, "src");
8✔
141
        if (!m->source)
8✔
142
                return log_oom();
×
143

144
        if (mkdir(m->source, 0755) < 0)
8✔
145
                return log_error_errno(errno, "Failed to create %s: %m", m->source);
×
146

147
        return 0;
148
}
149

150
int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n) {
394✔
151
        int r;
394✔
152

153
        /* Prepare all custom mounts. This will make sure we know all temporary directories. This is called in the
154
         * parent process, so that we know the temporary directories to remove on exit before we fork off the
155
         * children. */
156

157
        assert(l || n == 0);
394✔
158

159
        /* Order the custom mounts, and make sure we have a working directory */
160
        typesafe_qsort(l, n, custom_mount_compare);
394✔
161

162
        FOREACH_ARRAY(m, l, n) {
810✔
163
                /* /proc we mount in the inner child, i.e. when we acquired CLONE_NEWPID. All other mounts we mount
164
                 * already in the outer child, so that the mounts are already established before CLONE_NEWPID and in
165
                 * particular CLONE_NEWUSER. This also means any custom mounts below /proc also need to be mounted in
166
                 * the inner child, not the outer one. Determine this here. */
167
                m->in_userns = path_startswith(m->destination, "/proc");
416✔
168

169
                if (m->type == CUSTOM_MOUNT_BIND) {
416✔
170
                        if (m->source) {
372✔
171
                                char *s;
371✔
172

173
                                s = resolve_source_path(dest, m->source);
371✔
174
                                if (!s)
371✔
175
                                        return log_oom();
×
176

177
                                free_and_replace(m->source, s);
371✔
178
                        } else {
179
                                /* No source specified? In that case, use a throw-away temporary directory in /var/tmp */
180

181
                                r = allocate_temporary_source(m);
1✔
182
                                if (r < 0)
1✔
183
                                        return r;
184
                        }
185
                }
186

187
                if (m->type == CUSTOM_MOUNT_OVERLAY) {
416✔
188
                        STRV_FOREACH(j, m->lower) {
20✔
189
                                char *s;
11✔
190

191
                                s = resolve_source_path(dest, *j);
11✔
192
                                if (!s)
11✔
193
                                        return log_oom();
×
194

195
                                free_and_replace(*j, s);
11✔
196
                        }
197

198
                        if (m->source) {
9✔
199
                                char *s;
2✔
200

201
                                s = resolve_source_path(dest, m->source);
2✔
202
                                if (!s)
2✔
203
                                        return log_oom();
×
204

205
                                free_and_replace(m->source, s);
2✔
206
                        } else {
207
                                r = allocate_temporary_source(m);
7✔
208
                                if (r < 0)
7✔
209
                                        return r;
210
                        }
211

212
                        if (m->work_dir) {
9✔
213
                                char *s;
×
214

215
                                s = resolve_source_path(dest, m->work_dir);
×
216
                                if (!s)
×
217
                                        return log_oom();
×
218

219
                                free_and_replace(m->work_dir, s);
×
220
                        } else {
221
                                r = tempfn_random(m->source, NULL, &m->work_dir);
9✔
222
                                if (r < 0)
9✔
223
                                        return log_error_errno(r, "Failed to acquire working directory: %m");
×
224
                        }
225

226
                        (void) mkdir_label(m->work_dir, 0700);
9✔
227
                }
228
        }
229

230
        return 0;
231
}
232

233
int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
478✔
234
        _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL, *p = NULL;
478✔
235
        CustomMount *m;
478✔
236
        int r;
478✔
237

238
        assert(l);
478✔
239
        assert(n);
478✔
240

241
        r = extract_many_words(&s, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination);
478✔
242
        if (r < 0)
478✔
243
                return r;
244
        if (r == 0)
478✔
245
                return -EINVAL;
246
        if (r == 1) {
478✔
247
                destination = strdup(source[0] == '+' ? source+1 : source);
450✔
248
                if (!destination)
450✔
249
                        return -ENOMEM;
250
        }
251
        if (r == 2 && !isempty(s)) {
478✔
252
                opts = strdup(s);
9✔
253
                if (!opts)
9✔
254
                        return -ENOMEM;
255
        }
256

257
        r = source_path_parse_nullable(source, &p);
478✔
258
        if (r < 0)
478✔
259
                return r;
260

261
        if (!path_is_absolute(destination))
952✔
262
                return -EINVAL;
263

264
        m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
474✔
265
        if (!m)
474✔
266
                return -ENOMEM;
267

268
        m->source = TAKE_PTR(p);
474✔
269
        m->destination = TAKE_PTR(destination);
474✔
270
        m->read_only = read_only;
474✔
271
        m->options = TAKE_PTR(opts);
474✔
272

273
        return 0;
474✔
274
}
275

276
int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s) {
17✔
277
        _cleanup_free_ char *path = NULL, *opts = NULL;
17✔
278
        const char *p = ASSERT_PTR(s);
17✔
279
        CustomMount *m;
17✔
280
        int r;
17✔
281

282
        assert(l);
17✔
283
        assert(n);
17✔
284

285
        r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
17✔
286
        if (r < 0)
17✔
287
                return r;
288
        if (r == 0)
17✔
289
                return -EINVAL;
290

291
        if (isempty(p))
17✔
292
                opts = strdup("mode=0755");
16✔
293
        else
294
                opts = strdup(p);
1✔
295
        if (!opts)
17✔
296
                return -ENOMEM;
297

298
        if (!path_is_absolute(path))
32✔
299
                return -EINVAL;
300

301
        m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
15✔
302
        if (!m)
15✔
303
                return -ENOMEM;
304

305
        m->destination = TAKE_PTR(path);
15✔
306
        m->options = TAKE_PTR(opts);
15✔
307

308
        return 0;
15✔
309
}
310

311
int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
13✔
312
        _cleanup_free_ char *upper = NULL, *destination = NULL;
13✔
313
        _cleanup_strv_free_ char **lower = NULL;
13✔
314
        CustomMount *m;
13✔
315
        int r, k;
13✔
316

317
        k = strv_split_full(&lower, s, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
13✔
318
        if (k < 0)
13✔
319
                return k;
320
        if (k < 2)
13✔
321
                return -EADDRNOTAVAIL;
322
        if (k == 2) {
9✔
323
                _cleanup_free_ char *p = NULL;
×
324

325
                /* If two parameters are specified, the first one is the lower, the second one the upper directory. And
326
                 * we'll also define the destination mount point the same as the upper. */
327

328
                r = source_path_parse(lower[0], &p);
×
329
                if (r < 0)
×
330
                        return r;
331

332
                free_and_replace(lower[0], p);
×
333

334
                r = source_path_parse(lower[1], &p);
×
335
                if (r < 0)
×
336
                        return r;
337

338
                free_and_replace(lower[1], p);
×
339

340
                upper = TAKE_PTR(lower[1]);
×
341

342
                destination = strdup(upper[0] == '+' ? upper+1 : upper); /* take the destination without "+" prefix */
×
343
                if (!destination)
×
344
                        return -ENOMEM;
345
        } else {
346
                _cleanup_free_ char *p = NULL;
9✔
347

348
                /* If more than two parameters are specified, the last one is the destination, the second to last one
349
                 * the "upper", and all before that the "lower" directories. */
350

351
                destination = lower[k - 1];
9✔
352
                upper = TAKE_PTR(lower[k - 2]);
9✔
353

354
                STRV_FOREACH(i, lower) {
20✔
355
                        r = source_path_parse(*i, &p);
11✔
356
                        if (r < 0)
11✔
357
                                return r;
358

359
                        free_and_replace(*i, p);
11✔
360
                }
361

362
                /* If the upper directory is unspecified, then let's create it automatically as a throw-away directory
363
                 * in /var/tmp */
364
                r = source_path_parse_nullable(upper, &p);
9✔
365
                if (r < 0)
9✔
366
                        return r;
367

368
                free_and_replace(upper, p);
9✔
369

370
                if (!path_is_absolute(destination))
9✔
371
                        return -EINVAL;
372
        }
373

374
        m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY);
9✔
375
        if (!m)
9✔
376
                return -ENOMEM;
377

378
        m->destination = TAKE_PTR(destination);
9✔
379
        m->source = TAKE_PTR(upper);
9✔
380
        m->lower = TAKE_PTR(lower);
9✔
381
        m->read_only = read_only;
9✔
382

383
        return 0;
9✔
384
}
385

386
int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s) {
18✔
387
        _cleanup_free_ char *path = NULL;
18✔
388
        CustomMount *m;
18✔
389

390
        assert(l);
18✔
391
        assert(n);
18✔
392
        assert(s);
18✔
393

394
        if (!path_is_absolute(s))
18✔
395
                return -EINVAL;
396

397
        path = strdup(s);
16✔
398
        if (!path)
16✔
399
                return -ENOMEM;
400

401
        m = custom_mount_add(l, n, CUSTOM_MOUNT_INACCESSIBLE);
16✔
402
        if (!m)
16✔
403
                return -ENOMEM;
404

405
        m->destination = TAKE_PTR(path);
16✔
406
        return 0;
16✔
407
}
408

409
int tmpfs_patch_options(
1,052✔
410
                const char *options,
411
                uid_t uid_shift,
412
                const char *selinux_apifs_context,
413
                char **ret) {
414

415
        _cleanup_free_ char *buf = NULL;
1,052✔
416

417
        assert(ret);
1,052✔
418

419
        if (options) {
1,052✔
420
                buf = strdup(options);
1,052✔
421
                if (!buf)
1,052✔
422
                        return -ENOMEM;
423
        }
424

425
        if (uid_shift != UID_INVALID)
1,052✔
426
                if (strextendf_with_separator(&buf, ",", "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift) < 0)
1,040✔
427
                        return -ENOMEM;
428

429
#if HAVE_SELINUX
430
        if (selinux_apifs_context)
431
                if (strextendf_with_separator(&buf, ",", "context=\"%s\"", selinux_apifs_context) < 0)
432
                        return -ENOMEM;
433
#endif
434

435
        *ret = TAKE_PTR(buf);
1,052✔
436
        return !!*ret;
1,052✔
437
}
438

439
int mount_sysfs(const char *dest, MountSettingsMask mount_settings) {
113✔
440
        _cleanup_free_ char *top = NULL, *full = NULL;;
113✔
441
        unsigned long extra_flags = 0;
113✔
442
        int r;
113✔
443

444
        top = path_join(dest, "/sys");
113✔
445
        if (!top)
113✔
446
                return log_oom();
×
447

448
        r = path_is_mount_point(top);
113✔
449
        if (r < 0)
113✔
450
                return log_error_errno(r, "Failed to determine if '%s' is a mountpoint: %m", top);
×
451
        if (r == 0) {
113✔
452
                /* If this is not a mount point yet, then mount a tmpfs there */
453
                r = mount_nofollow_verbose(LOG_ERR, "tmpfs", top, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, "mode=0555" TMPFS_LIMITS_SYS);
×
454
                if (r < 0)
×
455
                        return r;
456
        } else {
457
                r = path_is_fs_type(top, SYSFS_MAGIC);
113✔
458
                if (r < 0)
113✔
459
                        return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
×
460

461
                /* /sys/ might already be mounted as sysfs by the outer child in the !netns case. In this case, it's
462
                 * all good. Don't touch it because we don't have the right to do so, see
463
                 * https://github.com/systemd/systemd/issues/1555.
464
                 */
465
                if (r > 0)
113✔
466
                        return 0;
467
        }
468

469
        full = path_join(top, "/full");
50✔
470
        if (!full)
50✔
471
                return log_oom();
×
472

473
        if (mkdir(full, 0755) < 0 && errno != EEXIST)
50✔
474
                return log_error_errno(errno, "Failed to create directory '%s': %m", full);
×
475

476
        if (FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO))
50✔
477
                extra_flags |= MS_RDONLY;
46✔
478

479
        r = mount_nofollow_verbose(LOG_ERR, "sysfs", full, "sysfs",
50✔
480
                                   MS_NOSUID|MS_NOEXEC|MS_NODEV|extra_flags, NULL);
481
        if (r < 0)
50✔
482
                return r;
483

484
        FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
350✔
485
                _cleanup_free_ char *from = NULL, *to = NULL;
300✔
486

487
                from = path_join(full, x);
300✔
488
                if (!from)
300✔
489
                        return log_oom();
×
490

491
                to = path_join(top, x);
300✔
492
                if (!to)
300✔
493
                        return log_oom();
×
494

495
                (void) mkdir(to, 0755);
300✔
496

497
                r = mount_nofollow_verbose(LOG_ERR, from, to, NULL, MS_BIND, NULL);
300✔
498
                if (r < 0)
300✔
499
                        return r;
500

501
                r = mount_nofollow_verbose(LOG_ERR, NULL, to, NULL,
300✔
502
                                           MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
503
                if (r < 0)
300✔
504
                        return r;
505
        }
506

507
        r = umount_verbose(LOG_ERR, full, UMOUNT_NOFOLLOW);
50✔
508
        if (r < 0)
50✔
509
                return r;
510

511
        if (rmdir(full) < 0)
50✔
512
                return log_error_errno(errno, "Failed to remove %s: %m", full);
×
513

514
        /* Create mountpoints. Otherwise we are not allowed since we remount /sys/ read-only. */
515
        FOREACH_STRING(p, "/fs/cgroup", "/fs/bpf") {
150✔
516
                _cleanup_free_ char *x = path_join(top, p);
200✔
517
                if (!x)
100✔
518
                        return log_oom();
×
519

520
                (void) mkdir_p(x, 0755);
100✔
521
        }
522

523
        return mount_nofollow_verbose(LOG_ERR, NULL, top, NULL,
113✔
524
                                      MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
525
}
526

527
#define PROC_DEFAULT_MOUNT_FLAGS (MS_NOSUID|MS_NOEXEC|MS_NODEV)
528
#define SYS_DEFAULT_MOUNT_FLAGS  (MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV)
529

530
int mount_all(const char *dest,
346✔
531
              MountSettingsMask mount_settings,
532
              uid_t uid_shift,
533
              const char *selinux_apifs_context) {
534

535
#define PROC_INACCESSIBLE_REG(path)                                     \
536
        { "/run/systemd/inaccessible/reg", (path), NULL, NULL, MS_BIND, \
537
          MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
538
        { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
539
          MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
540

541
#define PROC_READ_ONLY(path)                                            \
542
        { (path), (path), NULL, NULL, MS_BIND,                          \
543
          MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
544
        { NULL,   (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
545
          MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
546

547
        typedef struct MountPoint {
346✔
548
                const char *what;
549
                const char *where;
550
                const char *type;
551
                const char *options;
552
                unsigned long flags;
553
                MountSettingsMask mount_settings;
554
        } MountPoint;
555

556
        static const MountPoint mount_table[] = {
346✔
557
                /* First we list inner child mounts (i.e. mounts applied *after* entering user namespacing when we are privileged) */
558
                { "proc",            "/proc",           "proc",  NULL,        PROC_DEFAULT_MOUNT_FLAGS,
559
                  MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_MKDIR|MOUNT_FOLLOW_SYMLINKS }, /* we follow symlinks here since not following them requires /proc/ already being mounted, which we don't have here. */
560

561
                { "/proc/sys",       "/proc/sys",       NULL,    NULL,        MS_BIND,
562
                  MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO },                          /* Bind mount first ... */
563

564
                { "/proc/sys/net",   "/proc/sys/net",   NULL,    NULL,        MS_BIND,
565
                  MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */
566

567
                { NULL,              "/proc/sys",       NULL,    NULL,        MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
568
                  MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO },                          /* ... then, make it r/o */
569

570
                /* Make these files inaccessible to container payloads: they potentially leak information about kernel
571
                 * internals or the host's execution environment to the container */
572
                PROC_INACCESSIBLE_REG("/proc/kallsyms"),
573
                PROC_INACCESSIBLE_REG("/proc/kcore"),
574
                PROC_INACCESSIBLE_REG("/proc/keys"),
575
                PROC_INACCESSIBLE_REG("/proc/sysrq-trigger"),
576
                PROC_INACCESSIBLE_REG("/proc/timer_list"),
577

578
                /* Make these directories read-only to container payloads: they show hardware information, and in some
579
                 * cases contain tunables the container really shouldn't have access to. */
580
                PROC_READ_ONLY("/proc/acpi"),
581
                PROC_READ_ONLY("/proc/apm"),
582
                PROC_READ_ONLY("/proc/asound"),
583
                PROC_READ_ONLY("/proc/bus"),
584
                PROC_READ_ONLY("/proc/fs"),
585
                PROC_READ_ONLY("/proc/irq"),
586
                PROC_READ_ONLY("/proc/scsi"),
587

588
                { "mqueue",                 "/dev/mqueue",                  "mqueue", NULL,                            MS_NOSUID|MS_NOEXEC|MS_NODEV,
589
                  MOUNT_IN_USERNS|MOUNT_MKDIR },
590

591
                /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing when we are privileged) */
592
                { "tmpfs",                  "/tmp",                         "tmpfs", "mode=01777" NESTED_TMPFS_LIMITS, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
593
                  MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP|MOUNT_MKDIR|MOUNT_USRQUOTA_GRACEFUL },
594
                { "tmpfs",                  "/sys",                         "tmpfs", "mode=0555" TMPFS_LIMITS_SYS,     MS_NOSUID|MS_NOEXEC|MS_NODEV,
595
                  MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS|MOUNT_MKDIR|MOUNT_UNMANAGED },
596
                { "sysfs",                  "/sys",                         "sysfs", NULL,                             SYS_DEFAULT_MOUNT_FLAGS,
597
                  MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO|MOUNT_MKDIR|MOUNT_UNMANAGED },    /* skipped if above was mounted */
598
                { "sysfs",                  "/sys",                         "sysfs", NULL,                             MS_NOSUID|MS_NOEXEC|MS_NODEV,
599
                  MOUNT_FATAL|MOUNT_MKDIR|MOUNT_UNMANAGED },                          /* skipped if above was mounted */
600
                { "tmpfs",                  "/dev",                         "tmpfs", "mode=0755" TMPFS_LIMITS_PRIVATE_DEV, MS_NOSUID|MS_STRICTATIME,
601
                  MOUNT_FATAL|MOUNT_MKDIR },
602
                { "tmpfs",                  "/dev/shm",                     "tmpfs", "mode=01777" NESTED_TMPFS_LIMITS, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
603
                  MOUNT_FATAL|MOUNT_MKDIR|MOUNT_USRQUOTA_GRACEFUL },
604
                { "tmpfs",                  "/run",                         "tmpfs", "mode=0755" TMPFS_LIMITS_RUN,     MS_NOSUID|MS_NODEV|MS_STRICTATIME,
605
                  MOUNT_FATAL|MOUNT_MKDIR },
606
                { "/run/host",              "/run/host",                    NULL,    NULL,                             MS_BIND,
607
                  MOUNT_FATAL|MOUNT_MKDIR|MOUNT_PREFIX_ROOT }, /* Prepare this so that we can make it read-only when we are done */
608
                { "/etc/os-release",        "/run/host/os-release",         NULL,    NULL,                             MS_BIND,
609
                  MOUNT_TOUCH }, /* As per kernel interface requirements, bind mount first (creating mount points) and make read-only later */
610
                { "/usr/lib/os-release",    "/run/host/os-release",         NULL,    NULL,                             MS_BIND,
611
                  MOUNT_FATAL }, /* If /etc/os-release doesn't exist use the version in /usr/lib as fallback */
612
                { NULL,                     "/run/host/os-release",         NULL,    NULL,                             MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
613
                  MOUNT_FATAL },
614
                { NULL,                     "/run/host/os-release",         NULL,    NULL,                             MS_PRIVATE,
615
                  MOUNT_FATAL },  /* Turn off propagation (we only want that for the mount propagation tunnel dir) */
616
                { NULL,                     "/run/host",                    NULL,    NULL,                             MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
617
                  MOUNT_FATAL|MOUNT_IN_USERNS },
618
#if HAVE_SELINUX
619
                { "/sys/fs/selinux",        "/sys/fs/selinux",              NULL,    NULL,                             MS_BIND,
620
                  MOUNT_MKDIR|MOUNT_PRIVILEGED },  /* Bind mount first (mkdir/chown the mount point in case /sys/ is mounted as minimal skeleton tmpfs) */
621
                { NULL,                     "/sys/fs/selinux",              NULL,    NULL,                             MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
622
                  MOUNT_UNMANAGED|MOUNT_PRIVILEGED },  /* Then, make it r/o (don't mkdir/chown the mount point here, the previous entry already did that) */
623
                { NULL,                     "/sys/fs/selinux",              NULL,    NULL,                             MS_PRIVATE,
624
                  MOUNT_UNMANAGED|MOUNT_PRIVILEGED },  /* Turn off propagation (we only want that for the mount propagation tunnel dir) */
625
#endif
626
        };
627

628
        bool use_userns = FLAGS_SET(mount_settings, MOUNT_USE_USERNS);
346✔
629
        bool netns = FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_NETNS);
346✔
630
        bool ro = FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO);
346✔
631
        bool in_userns = FLAGS_SET(mount_settings, MOUNT_IN_USERNS);
346✔
632
        bool tmpfs_tmp = FLAGS_SET(mount_settings, MOUNT_APPLY_TMPFS_TMP);
346✔
633
        bool unmanaged = FLAGS_SET(mount_settings, MOUNT_UNMANAGED);
346✔
634
        bool privileged = FLAGS_SET(mount_settings, MOUNT_PRIVILEGED);
346✔
635
        int r;
346✔
636

637
        FOREACH_ELEMENT(m, mount_table) {
14,878✔
638
                _cleanup_free_ char *where = NULL, *options = NULL, *prefixed = NULL;
14,532✔
639
                bool fatal = FLAGS_SET(m->mount_settings, MOUNT_FATAL);
14,532✔
640
                const char *o;
14,532✔
641

642
                /* If we are in managed user namespace mode but the entry is marked for mount outside of
643
                 * managed user namespace mode, and to be mounted outside the user namespace, then skip it */
644
                if (!unmanaged && FLAGS_SET(m->mount_settings, MOUNT_UNMANAGED) && !FLAGS_SET(m->mount_settings, MOUNT_IN_USERNS))
14,532✔
645
                        continue;
21✔
646

647
                if (in_userns != FLAGS_SET(m->mount_settings, MOUNT_IN_USERNS))
14,511✔
648
                        continue;
8,346✔
649

650
                if (!netns && FLAGS_SET(m->mount_settings, MOUNT_APPLY_APIVFS_NETNS))
6,165✔
651
                        continue;
189✔
652

653
                if (!ro && FLAGS_SET(m->mount_settings, MOUNT_APPLY_APIVFS_RO))
5,976✔
654
                        continue;
228✔
655

656
                if (!tmpfs_tmp && FLAGS_SET(m->mount_settings, MOUNT_APPLY_TMPFS_TMP))
5,748✔
657
                        continue;
×
658

659
                if (!privileged && FLAGS_SET(m->mount_settings, MOUNT_PRIVILEGED))
5,748✔
660
                        continue;
×
661

662
                r = chase(m->where, dest, CHASE_NONEXISTENT|CHASE_PREFIX_ROOT, &where, NULL);
5,748✔
663
                if (r < 0)
5,748✔
664
                        return log_error_errno(r, "Failed to resolve %s%s: %m", strempty(dest), m->where);
×
665

666
                /* Skip this entry if it is not a remount. */
667
                if (m->what) {
5,748✔
668
                        r = path_is_mount_point(where);
3,804✔
669
                        if (r < 0 && r != -ENOENT)
3,804✔
670
                                return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
×
671
                        if (r > 0)
3,804✔
672
                                continue;
543✔
673
                }
674

675
                if ((m->mount_settings & (MOUNT_MKDIR|MOUNT_TOUCH)) != 0) {
5,205✔
676
                        uid_t u = (use_userns && !in_userns) ? uid_shift : UID_INVALID;
1,850✔
677

678
                        if (FLAGS_SET(m->mount_settings, MOUNT_TOUCH))
1,850✔
679
                                r = mkdir_parents_safe(dest, where, 0755, u, u, 0);
233✔
680
                        else
681
                                r = mkdir_p_safe(dest, where, 0755, u, u, 0);
1,617✔
682
                        if (r < 0 && r != -EEXIST) {
1,850✔
683
                                if (fatal && r != -EROFS)
×
684
                                        return log_error_errno(r, "Failed to create directory %s: %m", where);
×
685

686
                                log_debug_errno(r, "Failed to create directory %s: %m", where);
×
687

688
                                /* If we failed mkdir() or chown() due to the root directory being read only,
689
                                 * attempt to mount this fs anyway and let mount_verbose log any errors */
690
                                if (r != -EROFS)
×
691
                                        continue;
×
692
                        }
693
                }
694

695
                if (FLAGS_SET(m->mount_settings, MOUNT_TOUCH)) {
5,205✔
696
                        r = touch(where);
233✔
697
                        if (r < 0 && r != -EEXIST) {
233✔
698
                                if (fatal && r != -EROFS)
×
699
                                        return log_error_errno(r, "Failed to create file %s: %m", where);
×
700

701
                                log_debug_errno(r, "Failed to create file %s: %m", where);
×
702
                                if (r != -EROFS)
×
703
                                        continue;
×
704
                        }
705
                }
706

707
                o = m->options;
5,205✔
708
                if (streq_ptr(m->type, "tmpfs")) {
5,205✔
709
                        r = tmpfs_patch_options(o, in_userns ? 0 : uid_shift, selinux_apifs_context, &options);
2,064✔
710
                        if (r < 0)
1,032✔
711
                                return log_oom();
×
712
                        if (r > 0)
1,032✔
713
                                o = options;
1,032✔
714
                }
715

716
                if (FLAGS_SET(m->mount_settings, MOUNT_USRQUOTA_GRACEFUL)) {
5,205✔
717
                        r = mount_option_supported(m->type, /* key= */ "usrquota", /* value= */ NULL);
466✔
718
                        if (r < 0)
466✔
719
                                log_warning_errno(r, "Failed to determine if '%s' supports 'usrquota', assuming it doesn't: %m", m->type);
×
720
                        else if (r == 0)
466✔
721
                                log_debug("Kernel doesn't support 'usrquota' on '%s', not including in mount options for '%s'.", m->type, m->where);
14✔
722
                        else {
723
                                _cleanup_free_ char *joined = NULL;
×
724

725
                                if (!strextend_with_separator(&joined, ",", o ?: POINTER_MAX, "usrquota"))
452✔
726
                                        return log_oom();
×
727

728
                                free_and_replace(options, joined);
452✔
729
                                o = options;
452✔
730
                        }
731
                }
732

733
                if (FLAGS_SET(m->mount_settings, MOUNT_PREFIX_ROOT)) {
5,205✔
734
                        /* Optionally prefix the mount source with the root dir. This is useful in bind
735
                         * mounts to be created within the container image before we transition into it. Note
736
                         * that MOUNT_IN_USERNS is run after we transitioned hence prefixing is not necessary
737
                         * for those. */
738
                        r = chase(m->what, dest, CHASE_PREFIX_ROOT, &prefixed, NULL);
233✔
739
                        if (r < 0)
233✔
740
                                return log_error_errno(r, "Failed to resolve %s%s: %m", strempty(dest), m->what);
×
741
                }
742

743
                r = mount_verbose_full(
8,071✔
744
                                fatal ? LOG_ERR : LOG_DEBUG,
745
                                prefixed ?: m->what,
5,205✔
746
                                where,
747
                                m->type,
5,205✔
748
                                m->flags,
5,205✔
749
                                o,
750
                                FLAGS_SET(m->mount_settings, MOUNT_FOLLOW_SYMLINKS));
5,205✔
751
                if (r < 0 && fatal)
5,205✔
752
                        return r;
753
        }
754

755
        return 0;
756
}
757

758
static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts, RemountIdmapping *idmapping) {
34✔
759
        unsigned long flags = *mount_flags;
34✔
760
        char *opts = NULL;
34✔
761
        RemountIdmapping new_idmapping = *idmapping;
34✔
762
        int r;
34✔
763

764
        assert(options);
34✔
765

766
        for (;;) {
106✔
767
                _cleanup_free_ char *word = NULL;
36✔
768

769
                r = extract_first_word(&options, &word, ",", 0);
70✔
770
                if (r < 0)
70✔
771
                        return log_error_errno(r, "Failed to extract mount option: %m");
×
772
                if (r == 0)
70✔
773
                        break;
774

775
                if (streq(word, "rbind"))
36✔
776
                        flags |= MS_REC;
×
777
                else if (streq(word, "norbind"))
36✔
778
                        flags &= ~MS_REC;
2✔
779
                else if (streq(word, "idmap"))
34✔
780
                        new_idmapping = REMOUNT_IDMAPPING_HOST_ROOT;
781
                else if (streq(word, "noidmap"))
34✔
782
                        new_idmapping = REMOUNT_IDMAPPING_NONE;
783
                else if (streq(word, "rootidmap"))
32✔
784
                        new_idmapping = REMOUNT_IDMAPPING_HOST_OWNER;
785
                else if (streq(word, "owneridmap"))
32✔
786
                        new_idmapping = REMOUNT_IDMAPPING_HOST_OWNER_TO_TARGET_OWNER;
787
                else
788
                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
789
                                               "Invalid bind mount option: %s", word);
790
        }
791

792
        *mount_flags = flags;
34✔
793
        *idmapping = new_idmapping;
34✔
794
        /* in the future mount_opts will hold string options for mount(2) */
795
        *mount_opts = opts;
34✔
796

797
        return 0;
34✔
798
}
799

800
static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t uid_range) {
264✔
801
        _cleanup_free_ char *mount_opts = NULL, *where = NULL;
264✔
802
        unsigned long mount_flags = MS_BIND | MS_REC;
264✔
803
        struct stat source_st, dest_st;
264✔
804
        uid_t dest_uid = UID_INVALID;
264✔
805
        int r;
264✔
806
        RemountIdmapping idmapping = REMOUNT_IDMAPPING_NONE;
264✔
807

808
        assert(dest);
264✔
809
        assert(m);
264✔
810

811
        if (m->options) {
264✔
812
                r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts, &idmapping);
34✔
813
                if (r < 0)
34✔
814
                        return r;
815
        }
816

817
        /* If this is a bind mount from a temporary sources change ownership of the source to the container's
818
         * root UID. Otherwise it would always show up as "nobody" if user namespacing is used. */
819
        if (m->rm_rf_tmpdir && chown(m->source, uid_shift, uid_shift) < 0)
264✔
820
                return log_error_errno(errno, "Failed to chown %s: %m", m->source);
×
821

822
        /* UID/GIDs of idmapped mounts are always resolved in the caller's user namespace. In other
823
         * words, they're not nested. If we're doing an idmapped mount from a bind mount that's
824
         * already idmapped itself, the old idmap is replaced with the new one. This means that the
825
         * source uid which we put in the idmap userns has to be the uid of mount source in the
826
         * caller's userns *without* any mount idmapping in place. To get that uid, we clone the
827
         * mount source tree and clear any existing idmapping and temporarily mount that tree over
828
         * the mount source before we stat the mount source to figure out the source uid. */
829
        _cleanup_close_ int fd_clone = open_tree_attr_with_fallback(
264✔
830
                        AT_FDCWD,
831
                        m->source,
264✔
832
                        OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC,
833
                        &(struct mount_attr) {
264✔
834
                                .attr_clr = idmapping != REMOUNT_IDMAPPING_NONE ? MOUNT_ATTR_IDMAP : 0,
264✔
835
                        });
836
        if (ERRNO_IS_NEG_NOT_SUPPORTED(fd_clone))
264✔
837
                /* We can only clear idmapped mounts with open_tree_attr(), but there might not be one in
838
                 * the first place, so we keep going if we get a not supported error. */
839
                fd_clone = open_tree(AT_FDCWD, m->source, OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC);
×
840
        if (fd_clone < 0)
264✔
841
                return log_error_errno(errno, "Failed to clone %s: %m", m->source);
×
842

843
        if (fstat(fd_clone, &source_st) < 0)
264✔
844
                return log_error_errno(errno, "Failed to stat %s: %m", m->source);
×
845

846
        r = chase(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
264✔
847
        if (r < 0)
264✔
848
                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
849
        if (r > 0) { /* Path exists already? */
264✔
850

851
                if (stat(where, &dest_st) < 0)
148✔
852
                        return log_error_errno(errno, "Failed to stat %s: %m", where);
×
853

854
                dest_uid = uid_is_valid(m->destination_uid) ? uid_shift + m->destination_uid : dest_st.st_uid;
148✔
855

856
                if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode))
148✔
857
                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
858
                                               "Cannot bind mount directory %s on file %s.",
859
                                               m->source, where);
860

861
                if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode))
148✔
862
                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
863
                                               "Cannot bind mount file %s on directory %s.",
864
                                               m->source, where);
865

866
        } else { /* Path doesn't exist yet? */
867
                r = mkdir_parents_safe_label(dest, where, 0755, uid_shift, uid_shift, MKDIR_IGNORE_EXISTING);
116✔
868
                if (r < 0)
116✔
869
                        return log_error_errno(r, "Failed to make parents of %s: %m", where);
×
870

871
                /* Create the mount point. Any non-directory file can be
872
                * mounted on any non-directory file (regular, fifo, socket,
873
                * char, block).
874
                */
875
                if (S_ISDIR(source_st.st_mode))
116✔
876
                        r = mkdir_label(where, 0755);
114✔
877
                else
878
                        r = touch(where);
2✔
879
                if (r < 0)
116✔
880
                        return log_error_errno(r, "Failed to create mount point %s: %m", where);
×
881

882
                if (chown(where, uid_shift, uid_shift) < 0)
116✔
883
                        return log_error_errno(errno, "Failed to chown %s: %m", where);
×
884

885
                dest_uid = uid_shift + (uid_is_valid(m->destination_uid) ? m->destination_uid : 0);
146✔
886
        }
887

888
        if (move_mount(fd_clone, "", AT_FDCWD, where, MOVE_MOUNT_F_EMPTY_PATH) < 0)
264✔
889
                return log_error_errno(errno, "Failed to mount %s to %s: %m", m->source, where);
×
890

891
        fd_clone = safe_close(fd_clone);
264✔
892

893
        if (m->read_only) {
264✔
894
                r = bind_remount_recursive(where, MS_RDONLY, MS_RDONLY, NULL);
2✔
895
                if (r < 0)
2✔
896
                        return log_error_errno(r, "Read-only bind mount failed: %m");
×
897
        }
898

899
        if (idmapping != REMOUNT_IDMAPPING_NONE) {
264✔
900
                r = remount_idmap(STRV_MAKE(where), uid_shift, uid_range, source_st.st_uid, dest_uid, idmapping);
32✔
901
                if (r < 0)
32✔
902
                        return log_error_errno(r, "Failed to map ids for bind mount %s: %m", where);
×
903
        }
904

905
        return 0;
906
}
907

908
static int mount_tmpfs(const char *dest, CustomMount *m, uid_t uid_shift, const char *selinux_apifs_context) {
4✔
909
        const char *options;
4✔
910
        _cleanup_free_ char *buf = NULL, *where = NULL;
4✔
911
        int r;
4✔
912

913
        assert(dest);
4✔
914
        assert(m);
4✔
915

916
        r = chase(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
4✔
917
        if (r < 0)
4✔
918
                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
919
        if (r == 0) { /* Doesn't exist yet? */
4✔
920
                r = mkdir_p_label(where, 0755);
×
921
                if (r < 0)
×
922
                        return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
×
923
        }
924

925
        r = tmpfs_patch_options(m->options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
8✔
926
        if (r < 0)
4✔
927
                return log_oom();
×
928
        options = r > 0 ? buf : m->options;
4✔
929

930
        return mount_nofollow_verbose(LOG_ERR, "tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options);
4✔
931
}
932

933
static char *joined_and_escaped_lower_dirs(char **lower) {
2✔
934
        _cleanup_strv_free_ char **sv = NULL;
×
935

936
        sv = strv_copy(lower);
2✔
937
        if (!sv)
2✔
938
                return NULL;
939

940
        strv_reverse(sv);
2✔
941

942
        if (!strv_shell_escape(sv, ",:"))
2✔
943
                return NULL;
944

945
        return strv_join(sv, ":");
2✔
946
}
947

948
static int mount_overlay(const char *dest, CustomMount *m) {
2✔
949
        _cleanup_free_ char *lower = NULL, *where = NULL, *escaped_source = NULL;
2✔
950
        const char *options;
2✔
951
        int r;
2✔
952

953
        assert(dest);
2✔
954
        assert(m);
2✔
955

956
        r = chase(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
2✔
957
        if (r < 0)
2✔
958
                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
959
        if (r == 0) { /* Doesn't exist yet? */
2✔
960
                r = mkdir_label(where, 0755);
×
961
                if (r < 0)
×
962
                        return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
×
963
        }
964

965
        (void) mkdir_p_label(m->source, 0755);
2✔
966

967
        lower = joined_and_escaped_lower_dirs(m->lower);
2✔
968
        if (!lower)
2✔
969
                return log_oom();
×
970

971
        escaped_source = shell_escape(m->source, ",:");
2✔
972
        if (!escaped_source)
2✔
973
                return log_oom();
×
974

975
        if (m->read_only)
2✔
976
                options = strjoina("lowerdir=", escaped_source, ":", lower);
×
977
        else {
978
                _cleanup_free_ char *escaped_work_dir = NULL;
2✔
979

980
                escaped_work_dir = shell_escape(m->work_dir, ",:");
2✔
981
                if (!escaped_work_dir)
2✔
982
                        return log_oom();
×
983

984
                options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
26✔
985
        }
986

987
        return mount_nofollow_verbose(LOG_ERR, "overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options);
2✔
988
}
989

990
static int mount_inaccessible(const char *dest, CustomMount *m) {
4✔
991
        _cleanup_free_ char *where = NULL, *source = NULL;
4✔
992
        struct stat st;
4✔
993
        int r;
4✔
994

995
        assert(dest);
4✔
996
        assert(m);
4✔
997

998
        r = chase_and_stat(m->destination, dest, CHASE_PREFIX_ROOT, &where, &st);
4✔
999
        if (r < 0) {
4✔
1000
                log_full_errno(m->graceful ? LOG_DEBUG : LOG_ERR, r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
1001
                return m->graceful ? 0 : r;
×
1002
        }
1003

1004
        r = mode_to_inaccessible_node(NULL, st.st_mode, &source);
4✔
1005
        if (r < 0)
4✔
1006
                return m->graceful ? 0 : r;
×
1007

1008
        r = mount_nofollow_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, source, where, NULL, MS_BIND, NULL);
4✔
1009
        if (r < 0)
4✔
1010
                return m->graceful ? 0 : r;
×
1011

1012
        r = mount_nofollow_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, NULL, where, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL);
4✔
1013
        if (r < 0) {
4✔
1014
                (void) umount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, where, UMOUNT_NOFOLLOW);
×
1015
                return m->graceful ? 0 : r;
×
1016
        }
1017

1018
        return 0;
1019
}
1020

1021
static int mount_arbitrary(const char *dest, CustomMount *m) {
×
1022
        _cleanup_free_ char *where = NULL;
×
1023
        int r;
×
1024

1025
        assert(dest);
×
1026
        assert(m);
×
1027

1028
        r = chase(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
×
1029
        if (r < 0)
×
1030
                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
1031
        if (r == 0) { /* Doesn't exist yet? */
×
1032
                r = mkdir_p_label(where, 0755);
×
1033
                if (r < 0)
×
1034
                        return log_error_errno(r, "Creating mount point for mount %s failed: %m", where);
×
1035
        }
1036

1037
        return mount_nofollow_verbose(LOG_ERR, m->source, where, m->type_argument, 0, m->options);
×
1038
}
1039

1040
int mount_custom(
579✔
1041
                const char *dest,
1042
                CustomMount *mounts, size_t n,
1043
                uid_t uid_shift,
1044
                uid_t uid_range,
1045
                const char *selinux_apifs_context,
1046
                MountSettingsMask mount_settings) {
1047
        int r;
579✔
1048

1049
        assert(dest);
579✔
1050

1051
        FOREACH_ARRAY(m, mounts, n) {
1,264✔
1052
                if (FLAGS_SET(mount_settings, MOUNT_IN_USERNS) != m->in_userns)
685✔
1053
                        continue;
137✔
1054

1055
                if (FLAGS_SET(mount_settings, MOUNT_ROOT_ONLY) && !path_equal(m->destination, "/"))
548✔
1056
                        continue;
274✔
1057

1058
                if (FLAGS_SET(mount_settings, MOUNT_NON_ROOT_ONLY) && path_equal(m->destination, "/"))
274✔
1059
                        continue;
×
1060

1061
                switch (m->type) {
274✔
1062

1063
                case CUSTOM_MOUNT_BIND:
264✔
1064
                        r = mount_bind(dest, m, uid_shift, uid_range);
264✔
1065
                        break;
264✔
1066

1067
                case CUSTOM_MOUNT_TMPFS:
4✔
1068
                        r = mount_tmpfs(dest, m, uid_shift, selinux_apifs_context);
4✔
1069
                        break;
4✔
1070

1071
                case CUSTOM_MOUNT_OVERLAY:
2✔
1072
                        r = mount_overlay(dest, m);
2✔
1073
                        break;
2✔
1074

1075
                case CUSTOM_MOUNT_INACCESSIBLE:
4✔
1076
                        r = mount_inaccessible(dest, m);
4✔
1077
                        break;
4✔
1078

1079
                case CUSTOM_MOUNT_ARBITRARY:
×
1080
                        r = mount_arbitrary(dest, m);
×
1081
                        break;
×
1082

1083
                default:
×
1084
                        assert_not_reached();
×
1085
                }
1086

1087
                if (r < 0)
274✔
1088
                        return r;
1089
        }
1090

1091
        return 0;
1092
}
1093

1094
bool has_custom_root_mount(const CustomMount *mounts, size_t n) {
457✔
1095
        FOREACH_ARRAY(m, mounts, n)
934✔
1096
                if (path_equal(m->destination, "/"))
477✔
1097
                        return true;
1098

1099
        return false;
1100
}
1101

1102
static int setup_volatile_state(const char *directory) {
4✔
1103
        int r;
4✔
1104

1105
        assert(directory);
4✔
1106

1107
        /* --volatile=state means we simply overmount /var with a tmpfs, and the rest read-only. */
1108

1109
        /* First, remount the root directory. */
1110
        r = bind_remount_recursive(directory, MS_RDONLY, MS_RDONLY, NULL);
4✔
1111
        if (r < 0)
4✔
1112
                return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
×
1113

1114
        return 0;
1115
}
1116

1117
static int setup_volatile_state_after_remount_idmap(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
4✔
1118
        _cleanup_free_ char *buf = NULL;
8✔
1119
        int r;
4✔
1120

1121
        assert(directory);
4✔
1122

1123
        /* Then, after remount_idmap(), overmount /var/ with a tmpfs. */
1124

1125
        _cleanup_free_ char *p = path_join(directory, "/var");
8✔
1126
        if (!p)
4✔
1127
                return log_oom();
×
1128

1129
        r = mkdir(p, 0755);
4✔
1130
        if (r < 0 && errno != EEXIST)
4✔
1131
                return log_error_errno(errno, "Failed to create %s: %m", directory);
×
1132

1133
        const char *options = "mode=0755" TMPFS_LIMITS_VOLATILE_STATE;
4✔
1134
        r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
6✔
1135
        if (r < 0)
4✔
1136
                return log_oom();
×
1137
        if (r > 0)
4✔
1138
                options = buf;
4✔
1139

1140
        return mount_nofollow_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options);
4✔
1141
}
1142

1143
static int setup_volatile_yes(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
8✔
1144
        bool tmpfs_mounted = false, bind_mounted = false;
8✔
1145
        _cleanup_(rmdir_and_freep) char *template = NULL;
×
1146
        _cleanup_free_ char *buf = NULL, *bindir = NULL, *f = NULL, *t = NULL;
8✔
1147
        struct stat st;
8✔
1148
        int r;
8✔
1149

1150
        assert(directory);
8✔
1151

1152
        /* --volatile=yes means we mount a tmpfs to the root dir, and the original /usr to use inside it, and
1153
         * that read-only. Before we start setting this up let's validate if the image has the /usr merge
1154
         * implemented, and let's output a friendly log message if it hasn't. */
1155

1156
        bindir = path_join(directory, "/bin");
8✔
1157
        if (!bindir)
8✔
1158
                return log_oom();
×
1159
        if (lstat(bindir, &st) < 0) {
8✔
1160
                if (errno != ENOENT)
×
1161
                        return log_error_errno(errno, "Failed to stat /bin directory below image: %m");
×
1162

1163
                /* ENOENT is fine, just means the image is probably just a naked /usr and we can create the
1164
                 * rest. */
1165
        } else if (S_ISDIR(st.st_mode))
8✔
1166
                return log_error_errno(SYNTHETIC_ERRNO(EISDIR),
×
1167
                                       "Sorry, --volatile=yes mode is not supported with OS images that have not merged /bin/, /sbin/, /lib/, /lib64/ into /usr/. "
1168
                                       "Please work with your distribution and help them adopt the merged /usr scheme.");
1169
        else if (!S_ISLNK(st.st_mode))
8✔
1170
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
1171
                                       "Error starting image: if --volatile=yes is used /bin must be a symlink (for merged /usr support) or non-existent (in which case a symlink is created automatically).");
1172

1173
        r = mkdtemp_malloc("/tmp/nspawn-volatile-XXXXXX", &template);
8✔
1174
        if (r < 0)
8✔
1175
                return log_error_errno(r, "Failed to create temporary directory: %m");
×
1176

1177
        const char *options = "mode=0755" TMPFS_LIMITS_ROOTFS;
8✔
1178
        r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
12✔
1179
        if (r < 0)
8✔
1180
                goto fail;
×
1181
        if (r > 0)
8✔
1182
                options = buf;
8✔
1183

1184
        r = mount_nofollow_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
8✔
1185
        if (r < 0)
8✔
1186
                goto fail;
×
1187

1188
        tmpfs_mounted = true;
8✔
1189

1190
        f = path_join(directory, "/usr");
8✔
1191
        if (!f) {
8✔
1192
                r = log_oom();
×
1193
                goto fail;
×
1194
        }
1195

1196
        t = path_join(template, "/usr");
8✔
1197
        if (!t) {
8✔
1198
                r = log_oom();
×
1199
                goto fail;
×
1200
        }
1201

1202
        r = mkdir(t, 0755);
8✔
1203
        if (r < 0 && errno != EEXIST) {
8✔
1204
                r = log_error_errno(errno, "Failed to create %s: %m", t);
×
1205
                goto fail;
×
1206
        }
1207

1208
        r = mount_nofollow_verbose(LOG_ERR, f, t, NULL, MS_BIND|MS_REC, NULL);
8✔
1209
        if (r < 0)
8✔
1210
                goto fail;
×
1211

1212
        bind_mounted = true;
8✔
1213

1214
        r = bind_remount_recursive(t, MS_RDONLY, MS_RDONLY, NULL);
8✔
1215
        if (r < 0) {
8✔
1216
                log_error_errno(r, "Failed to remount %s read-only: %m", t);
×
1217
                goto fail;
×
1218
        }
1219

1220
        r = mount_nofollow_verbose(LOG_ERR, template, directory, NULL, MS_MOVE, NULL);
8✔
1221
        if (r < 0)
8✔
1222
                goto fail;
×
1223

1224
        (void) rmdir(template);
8✔
1225

1226
        return 0;
8✔
1227

1228
fail:
1229
        if (bind_mounted)
×
1230
                (void) umount_verbose(LOG_ERR, t, UMOUNT_NOFOLLOW);
×
1231

1232
        if (tmpfs_mounted)
×
1233
                (void) umount_verbose(LOG_ERR, template, UMOUNT_NOFOLLOW);
×
1234

1235
        return r;
1236
}
1237

1238
static int setup_volatile_overlay(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
4✔
1239
        _cleanup_free_ char *buf = NULL, *escaped_directory = NULL, *escaped_upper = NULL, *escaped_work = NULL;
4✔
1240
        _cleanup_(rmdir_and_freep) char *template = NULL;
4✔
1241
        const char *upper, *work, *options;
4✔
1242
        bool tmpfs_mounted = false;
4✔
1243
        int r;
4✔
1244

1245
        assert(directory);
4✔
1246

1247
        /* --volatile=overlay means we mount an overlayfs to the root dir. */
1248

1249
        r = mkdtemp_malloc("/tmp/nspawn-volatile-XXXXXX", &template);
4✔
1250
        if (r < 0)
4✔
1251
                return log_error_errno(r, "Failed to create temporary directory: %m");
×
1252

1253
        options = "mode=0755" TMPFS_LIMITS_ROOTFS;
4✔
1254
        r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
6✔
1255
        if (r < 0)
4✔
1256
                goto finish;
×
1257
        if (r > 0)
4✔
1258
                options = buf;
4✔
1259

1260
        r = mount_nofollow_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
4✔
1261
        if (r < 0)
4✔
1262
                goto finish;
×
1263

1264
        tmpfs_mounted = true;
4✔
1265

1266
        upper = strjoina(template, "/upper");
20✔
1267
        work = strjoina(template, "/work");
20✔
1268

1269
        if (mkdir(upper, 0755) < 0) {
4✔
1270
                r = log_error_errno(errno, "Failed to create %s: %m", upper);
×
1271
                goto finish;
×
1272
        }
1273
        if (mkdir(work, 0755) < 0) {
4✔
1274
                r = log_error_errno(errno, "Failed to create %s: %m", work);
×
1275
                goto finish;
×
1276
        }
1277

1278
        /* And now, let's overmount the root dir with an overlayfs that uses the root dir as lower dir. It's kinda nice
1279
         * that the kernel allows us to do that without going through some mount point rearrangements. */
1280

1281
        escaped_directory = shell_escape(directory, ",:");
4✔
1282
        escaped_upper = shell_escape(upper, ",:");
4✔
1283
        escaped_work = shell_escape(work, ",:");
4✔
1284
        if (!escaped_directory || !escaped_upper || !escaped_work) {
4✔
1285
                r = -ENOMEM;
×
1286
                goto finish;
×
1287
        }
1288

1289
        options = strjoina("lowerdir=", escaped_directory, ",upperdir=", escaped_upper, ",workdir=", escaped_work);
52✔
1290
        r = mount_nofollow_verbose(LOG_ERR, "overlay", directory, "overlay", 0, options);
4✔
1291

1292
finish:
1293
        if (tmpfs_mounted)
×
1294
                (void) umount_verbose(LOG_ERR, template, UMOUNT_NOFOLLOW);
4✔
1295

1296
        return r;
1297
}
1298

1299
int setup_volatile_mode(
235✔
1300
                const char *directory,
1301
                VolatileMode mode,
1302
                uid_t uid_shift,
1303
                const char *selinux_apifs_context) {
1304

1305
        switch (mode) {
235✔
1306

1307
        case VOLATILE_YES:
8✔
1308
                return setup_volatile_yes(directory, uid_shift, selinux_apifs_context);
8✔
1309

1310
        case VOLATILE_STATE:
4✔
1311
                return setup_volatile_state(directory);
4✔
1312

1313
        case VOLATILE_OVERLAY:
4✔
1314
                return setup_volatile_overlay(directory, uid_shift, selinux_apifs_context);
4✔
1315

1316
        default:
1317
                return 0;
1318
        }
1319
}
1320

1321
int setup_volatile_mode_after_remount_idmap(
233✔
1322
                const char *directory,
1323
                VolatileMode mode,
1324
                uid_t uid_shift,
1325
                const char *selinux_apifs_context) {
1326

1327
        switch (mode) {
233✔
1328

1329
        case VOLATILE_STATE:
4✔
1330
                return setup_volatile_state_after_remount_idmap(directory, uid_shift, selinux_apifs_context);
4✔
1331

1332
        default:
1333
                return 0;
1334
        }
1335
}
1336

1337
/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
1338
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
2✔
1339
        _cleanup_free_ char *root_new = NULL, *root_old = NULL;
2✔
1340
        const char *p = s;
2✔
1341
        int r;
2✔
1342

1343
        assert(pivot_root_new);
2✔
1344
        assert(pivot_root_old);
2✔
1345

1346
        r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
2✔
1347
        if (r < 0)
2✔
1348
                return r;
1349
        if (r == 0)
2✔
1350
                return -EINVAL;
1351

1352
        if (isempty(p))
2✔
1353
                root_old = NULL;
1354
        else {
1355
                root_old = strdup(p);
×
1356
                if (!root_old)
×
1357
                        return -ENOMEM;
1358
        }
1359

1360
        if (!path_is_absolute(root_new))
2✔
1361
                return -EINVAL;
1362
        if (root_old && !path_is_absolute(root_old))
×
1363
                return -EINVAL;
1364

1365
        free_and_replace(*pivot_root_new, root_new);
×
1366
        free_and_replace(*pivot_root_old, root_old);
×
1367

1368
        return 0;
×
1369
}
1370

1371
int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) {
235✔
1372
        _cleanup_free_ char *directory_pivot_root_new = NULL;
470✔
1373
        _cleanup_free_ char *pivot_tmp_pivot_root_old = NULL;
235✔
1374
        _cleanup_(rmdir_and_freep) char *pivot_tmp = NULL;
235✔
1375
        int r;
235✔
1376

1377
        assert(directory);
235✔
1378

1379
        if (!pivot_root_new)
235✔
1380
                return 0;
1381

1382
        /* Pivot pivot_root_new to / and the existing / to pivot_root_old.
1383
         * If pivot_root_old is NULL, the existing / disappears.
1384
         * This requires a temporary directory, pivot_tmp, which is
1385
         * not a child of either.
1386
         *
1387
         * This is typically used for OSTree-style containers, where the root partition contains several
1388
         * sysroots which could be run. Normally, one would be chosen by the bootloader and pivoted to / by
1389
         * initrd.
1390
         *
1391
         * For example, for an OSTree deployment, pivot_root_new
1392
         * would be: /ostree/deploy/$os/deploy/$checksum. Note that this
1393
         * code doesn’t do the /var mount which OSTree expects: use
1394
         * --bind +/sysroot/ostree/deploy/$os/var:/var for that.
1395
         *
1396
         * So in the OSTree case, we’ll end up with something like:
1397
         *  - directory = /tmp/nspawn-root-123456
1398
         *  - pivot_root_new = /ostree/deploy/os/deploy/123abc
1399
         *  - pivot_root_old = /sysroot
1400
         *  - directory_pivot_root_new =
1401
         *       /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc
1402
         *  - pivot_tmp = /tmp/nspawn-pivot-123456
1403
         *  - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot
1404
         *
1405
         * Requires all file systems at directory and below to be mounted
1406
         * MS_PRIVATE or MS_SLAVE so they can be moved.
1407
         */
1408
        directory_pivot_root_new = path_join(directory, pivot_root_new);
×
1409
        if (!directory_pivot_root_new)
×
1410
                return log_oom();
×
1411

1412
        /* Remount directory_pivot_root_new to make it movable. */
1413
        r = mount_nofollow_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL);
×
1414
        if (r < 0)
×
1415
                return r;
1416

1417
        if (pivot_root_old) {
×
1418
                r = mkdtemp_malloc("/tmp/nspawn-pivot-XXXXXX", &pivot_tmp);
×
1419
                if (r < 0)
×
1420
                        return log_error_errno(r, "Failed to create temporary directory: %m");
×
1421

1422
                pivot_tmp_pivot_root_old = path_join(pivot_tmp, pivot_root_old);
×
1423
                if (!pivot_tmp_pivot_root_old)
×
1424
                        return log_oom();
×
1425

1426
                r = mount_nofollow_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL);
×
1427
                if (r < 0)
×
1428
                        return r;
1429

1430
                r = mount_nofollow_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL);
×
1431
                if (r < 0)
×
1432
                        return r;
1433

1434
                r = mount_nofollow_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL);
×
1435
        } else
1436
                r = mount_nofollow_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL);
×
1437

1438
        if (r < 0)
×
1439
                return r;
×
1440

1441
        return 0;
1442
}
1443

1444
#define NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS "/run/host/proc"
1445
#define NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS "/run/host/sys"
1446

1447
int pin_fully_visible_api_fs(void) {
104✔
1448
        int r;
104✔
1449

1450
        log_debug("Pinning fully visible API FS");
104✔
1451

1452
        (void) mkdir_p(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, 0755);
104✔
1453
        (void) mkdir_p(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, 0755);
104✔
1454

1455
        r = mount_follow_verbose(LOG_ERR, "proc", NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, "proc", PROC_DEFAULT_MOUNT_FLAGS, NULL);
104✔
1456
        if (r < 0)
104✔
1457
                return r;
1458

1459
        r = mount_follow_verbose(LOG_ERR, "sysfs", NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, "sysfs", SYS_DEFAULT_MOUNT_FLAGS, NULL);
104✔
1460
        if (r < 0)
104✔
1461
                return r;
×
1462

1463
        return 0;
1464
}
1465

1466
static int do_wipe_fully_visible_api_fs(void) {
61✔
1467
        if (umount2(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, MNT_DETACH) < 0)
61✔
1468
                return log_error_errno(errno, "Failed to unmount temporary proc: %m");
×
1469

1470
        if (rmdir(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS) < 0)
61✔
1471
                return log_error_errno(errno, "Failed to remove temporary proc mountpoint: %m");
×
1472

1473
        if (umount2(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, MNT_DETACH) < 0)
61✔
1474
                return log_error_errno(errno, "Failed to unmount temporary sys: %m");
×
1475

1476
        if (rmdir(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS) < 0)
61✔
1477
                return log_error_errno(errno, "Failed to remove temporary sys mountpoint: %m");
×
1478

1479
        return 0;
1480
}
1481

1482
int wipe_fully_visible_api_fs(int mntns_fd) {
61✔
1483
        _cleanup_close_ int orig_mntns_fd = -EBADF;
61✔
1484
        int r, rr;
61✔
1485

1486
        log_debug("Wiping fully visible API FS");
61✔
1487

1488
        orig_mntns_fd = namespace_open_by_type(NAMESPACE_MOUNT);
61✔
1489
        if (orig_mntns_fd < 0)
61✔
1490
                return log_error_errno(orig_mntns_fd, "Failed to pin originating mount namespace: %m");
×
1491

1492
        r = namespace_enter(/* pidns_fd = */ -EBADF,
61✔
1493
                            mntns_fd,
1494
                            /* netns_fd = */ -EBADF,
1495
                            /* userns_fd = */ -EBADF,
1496
                            /* root_fd = */ -EBADF);
1497
        if (r < 0)
61✔
1498
                return log_error_errno(r, "Failed to enter mount namespace: %m");
×
1499

1500
        rr = do_wipe_fully_visible_api_fs();
61✔
1501

1502
        r = namespace_enter(/* pidns_fd = */ -EBADF,
61✔
1503
                            orig_mntns_fd,
1504
                            /* netns_fd = */ -EBADF,
1505
                            /* userns_fd = */ -EBADF,
1506
                            /* root_fd = */ -EBADF);
1507
        if (r < 0)
61✔
1508
                return log_error_errno(r, "Failed to enter original mount namespace: %m");
×
1509

1510
        return rr;
1511
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc