• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 15986406979

30 Jun 2025 05:03PM UTC coverage: 72.045% (-0.09%) from 72.13%
15986406979

push

github

bluca
man/systemd-sysext: list ephemeral/ephemeral-import in the list of options

ephemeral/ephemeral-import are described as possible '--mutable' options but
not present in the list. Note, "systemd-sysext --help" lists them correctly.

300514 of 417119 relevant lines covered (72.05%)

708586.28 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.45
/src/nspawn/nspawn-mount.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <linux/magic.h>
4
#include <sys/mount.h>
5
#include <unistd.h>
6

7
#include "alloc-util.h"
8
#include "chase.h"
9
#include "escape.h"
10
#include "extract-word.h"
11
#include "fd-util.h"
12
#include "format-util.h"
13
#include "fs-util.h"
14
#include "log.h"
15
#include "mkdir-label.h"
16
#include "mount-util.h"
17
#include "mountpoint-util.h"
18
#include "namespace-util.h"
19
#include "nspawn-mount.h"
20
#include "path-util.h"
21
#include "rm-rf.h"
22
#include "sort-util.h"
23
#include "stat-util.h"
24
#include "string-util.h"
25
#include "strv.h"
26
#include "tmpfile-util.h"
27

28
CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t) {
522✔
29
        CustomMount *ret;
522✔
30

31
        assert(l);
522✔
32
        assert(n);
522✔
33
        assert(t >= 0);
522✔
34
        assert(t < _CUSTOM_MOUNT_TYPE_MAX);
522✔
35

36
        if (!GREEDY_REALLOC(*l, *n + 1))
522✔
37
                return NULL;
38

39
        ret = *l + *n;
522✔
40
        (*n)++;
522✔
41

42
        *ret = (CustomMount) {
522✔
43
                .type = t
44
        };
45

46
        return ret;
522✔
47
}
48

49
void custom_mount_free_all(CustomMount *l, size_t n) {
1,174✔
50
        FOREACH_ARRAY(m, l, n) {
1,424✔
51
                free(m->source);
250✔
52
                free(m->destination);
250✔
53
                free(m->options);
250✔
54

55
                if (m->work_dir) {
250✔
56
                        (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
3✔
57
                        free(m->work_dir);
3✔
58
                }
59

60
                if (m->rm_rf_tmpdir) {
250✔
61
                        (void) rm_rf(m->rm_rf_tmpdir, REMOVE_ROOT|REMOVE_PHYSICAL);
2✔
62
                        free(m->rm_rf_tmpdir);
2✔
63
                }
64

65
                strv_free(m->lower);
250✔
66
                free(m->type_argument);
250✔
67
        }
68

69
        free(l);
1,174✔
70
}
1,174✔
71

72
static int custom_mount_compare(const CustomMount *a, const CustomMount *b) {
103✔
73
        int r;
103✔
74

75
        r = path_compare(a->destination, b->destination);
103✔
76
        if (r != 0)
103✔
77
                return r;
78

79
        return CMP(a->type, b->type);
×
80
}
81

82
static int source_path_parse(const char *p, char **ret) {
489✔
83
        assert(p);
489✔
84
        assert(ret);
489✔
85

86
        if (isempty(p))
489✔
87
                return -EINVAL;
88

89
        if (*p == '+') {
489✔
90
                if (!path_is_absolute(p + 1))
7✔
91
                        return -EINVAL;
489✔
92

93
                char *s = strdup(p);
7✔
94
                if (!s)
7✔
95
                        return -ENOMEM;
96

97
                *ret = TAKE_PTR(s);
7✔
98
                return 0;
7✔
99
        }
100

101
        return path_make_absolute_cwd(p, ret);
482✔
102
}
103

104
static int source_path_parse_nullable(const char *p, char **ret) {
485✔
105
        assert(p);
485✔
106
        assert(ret);
485✔
107

108
        if (isempty(p)) {
485✔
109
                *ret = NULL;
7✔
110
                return 0;
7✔
111
        }
112

113
        return source_path_parse(p, ret);
478✔
114
}
115

116
static char *resolve_source_path(const char *dest, const char *source) {
383✔
117
        if (!source)
383✔
118
                return NULL;
119

120
        if (source[0] == '+')
383✔
121
                return path_join(dest, source + 1);
8✔
122

123
        return strdup(source);
375✔
124
}
125

126
static int allocate_temporary_source(CustomMount *m) {
8✔
127
        int r;
8✔
128

129
        assert(m);
8✔
130
        assert(!m->source);
8✔
131
        assert(!m->rm_rf_tmpdir);
8✔
132

133
        r = mkdtemp_malloc("/var/tmp/nspawn-temp-XXXXXX", &m->rm_rf_tmpdir);
8✔
134
        if (r < 0)
8✔
135
                return log_error_errno(r, "Failed to acquire temporary directory: %m");
×
136

137
        m->source = path_join(m->rm_rf_tmpdir, "src");
8✔
138
        if (!m->source)
8✔
139
                return log_oom();
×
140

141
        if (mkdir(m->source, 0755) < 0)
8✔
142
                return log_error_errno(errno, "Failed to create %s: %m", m->source);
×
143

144
        return 0;
145
}
146

147
int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n) {
392✔
148
        int r;
392✔
149

150
        /* Prepare all custom mounts. This will make sure we know all temporary directories. This is called in the
151
         * parent process, so that we know the temporary directories to remove on exit before we fork off the
152
         * children. */
153

154
        assert(l || n == 0);
392✔
155

156
        /* Order the custom mounts, and make sure we have a working directory */
157
        typesafe_qsort(l, n, custom_mount_compare);
392✔
158

159
        FOREACH_ARRAY(m, l, n) {
807✔
160
                /* /proc we mount in the inner child, i.e. when we acquired CLONE_NEWPID. All other mounts we mount
161
                 * already in the outer child, so that the mounts are already established before CLONE_NEWPID and in
162
                 * particular CLONE_NEWUSER. This also means any custom mounts below /proc also need to be mounted in
163
                 * the inner child, not the outer one. Determine this here. */
164
                m->in_userns = path_startswith(m->destination, "/proc");
415✔
165

166
                if (m->type == CUSTOM_MOUNT_BIND) {
415✔
167
                        if (m->source) {
371✔
168
                                char *s;
370✔
169

170
                                s = resolve_source_path(dest, m->source);
370✔
171
                                if (!s)
370✔
172
                                        return log_oom();
×
173

174
                                free_and_replace(m->source, s);
370✔
175
                        } else {
176
                                /* No source specified? In that case, use a throw-away temporary directory in /var/tmp */
177

178
                                r = allocate_temporary_source(m);
1✔
179
                                if (r < 0)
1✔
180
                                        return r;
181
                        }
182
                }
183

184
                if (m->type == CUSTOM_MOUNT_OVERLAY) {
415✔
185
                        STRV_FOREACH(j, m->lower) {
20✔
186
                                char *s;
11✔
187

188
                                s = resolve_source_path(dest, *j);
11✔
189
                                if (!s)
11✔
190
                                        return log_oom();
×
191

192
                                free_and_replace(*j, s);
11✔
193
                        }
194

195
                        if (m->source) {
9✔
196
                                char *s;
2✔
197

198
                                s = resolve_source_path(dest, m->source);
2✔
199
                                if (!s)
2✔
200
                                        return log_oom();
×
201

202
                                free_and_replace(m->source, s);
2✔
203
                        } else {
204
                                r = allocate_temporary_source(m);
7✔
205
                                if (r < 0)
7✔
206
                                        return r;
207
                        }
208

209
                        if (m->work_dir) {
9✔
210
                                char *s;
×
211

212
                                s = resolve_source_path(dest, m->work_dir);
×
213
                                if (!s)
×
214
                                        return log_oom();
×
215

216
                                free_and_replace(m->work_dir, s);
×
217
                        } else {
218
                                r = tempfn_random(m->source, NULL, &m->work_dir);
9✔
219
                                if (r < 0)
9✔
220
                                        return log_error_errno(r, "Failed to acquire working directory: %m");
×
221
                        }
222

223
                        (void) mkdir_label(m->work_dir, 0700);
9✔
224
                }
225
        }
226

227
        return 0;
228
}
229

230
int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
476✔
231
        _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL, *p = NULL;
476✔
232
        CustomMount *m;
476✔
233
        int r;
476✔
234

235
        assert(l);
476✔
236
        assert(n);
476✔
237

238
        r = extract_many_words(&s, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination);
476✔
239
        if (r < 0)
476✔
240
                return r;
241
        if (r == 0)
476✔
242
                return -EINVAL;
243
        if (r == 1) {
476✔
244
                destination = strdup(source[0] == '+' ? source+1 : source);
448✔
245
                if (!destination)
448✔
246
                        return -ENOMEM;
247
        }
248
        if (r == 2 && !isempty(s)) {
476✔
249
                opts = strdup(s);
9✔
250
                if (!opts)
9✔
251
                        return -ENOMEM;
252
        }
253

254
        r = source_path_parse_nullable(source, &p);
476✔
255
        if (r < 0)
476✔
256
                return r;
257

258
        if (!path_is_absolute(destination))
948✔
259
                return -EINVAL;
260

261
        m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
472✔
262
        if (!m)
472✔
263
                return -ENOMEM;
264

265
        m->source = TAKE_PTR(p);
472✔
266
        m->destination = TAKE_PTR(destination);
472✔
267
        m->read_only = read_only;
472✔
268
        m->options = TAKE_PTR(opts);
472✔
269

270
        return 0;
472✔
271
}
272

273
int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s) {
17✔
274
        _cleanup_free_ char *path = NULL, *opts = NULL;
17✔
275
        const char *p = ASSERT_PTR(s);
17✔
276
        CustomMount *m;
17✔
277
        int r;
17✔
278

279
        assert(l);
17✔
280
        assert(n);
17✔
281

282
        r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
17✔
283
        if (r < 0)
17✔
284
                return r;
285
        if (r == 0)
17✔
286
                return -EINVAL;
287

288
        if (isempty(p))
17✔
289
                opts = strdup("mode=0755");
16✔
290
        else
291
                opts = strdup(p);
1✔
292
        if (!opts)
17✔
293
                return -ENOMEM;
294

295
        if (!path_is_absolute(path))
32✔
296
                return -EINVAL;
297

298
        m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
15✔
299
        if (!m)
15✔
300
                return -ENOMEM;
301

302
        m->destination = TAKE_PTR(path);
15✔
303
        m->options = TAKE_PTR(opts);
15✔
304

305
        return 0;
15✔
306
}
307

308
int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
13✔
309
        _cleanup_free_ char *upper = NULL, *destination = NULL;
13✔
310
        _cleanup_strv_free_ char **lower = NULL;
13✔
311
        CustomMount *m;
13✔
312
        int r, k;
13✔
313

314
        k = strv_split_full(&lower, s, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
13✔
315
        if (k < 0)
13✔
316
                return k;
317
        if (k < 2)
13✔
318
                return -EADDRNOTAVAIL;
319
        if (k == 2) {
9✔
320
                _cleanup_free_ char *p = NULL;
×
321

322
                /* If two parameters are specified, the first one is the lower, the second one the upper directory. And
323
                 * we'll also define the destination mount point the same as the upper. */
324

325
                r = source_path_parse(lower[0], &p);
×
326
                if (r < 0)
×
327
                        return r;
328

329
                free_and_replace(lower[0], p);
×
330

331
                r = source_path_parse(lower[1], &p);
×
332
                if (r < 0)
×
333
                        return r;
334

335
                free_and_replace(lower[1], p);
×
336

337
                upper = TAKE_PTR(lower[1]);
×
338

339
                destination = strdup(upper[0] == '+' ? upper+1 : upper); /* take the destination without "+" prefix */
×
340
                if (!destination)
×
341
                        return -ENOMEM;
342
        } else {
343
                _cleanup_free_ char *p = NULL;
9✔
344

345
                /* If more than two parameters are specified, the last one is the destination, the second to last one
346
                 * the "upper", and all before that the "lower" directories. */
347

348
                destination = lower[k - 1];
9✔
349
                upper = TAKE_PTR(lower[k - 2]);
9✔
350

351
                STRV_FOREACH(i, lower) {
20✔
352
                        r = source_path_parse(*i, &p);
11✔
353
                        if (r < 0)
11✔
354
                                return r;
355

356
                        free_and_replace(*i, p);
11✔
357
                }
358

359
                /* If the upper directory is unspecified, then let's create it automatically as a throw-away directory
360
                 * in /var/tmp */
361
                r = source_path_parse_nullable(upper, &p);
9✔
362
                if (r < 0)
9✔
363
                        return r;
364

365
                free_and_replace(upper, p);
9✔
366

367
                if (!path_is_absolute(destination))
9✔
368
                        return -EINVAL;
369
        }
370

371
        m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY);
9✔
372
        if (!m)
9✔
373
                return -ENOMEM;
374

375
        m->destination = TAKE_PTR(destination);
9✔
376
        m->source = TAKE_PTR(upper);
9✔
377
        m->lower = TAKE_PTR(lower);
9✔
378
        m->read_only = read_only;
9✔
379

380
        return 0;
9✔
381
}
382

383
int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s) {
18✔
384
        _cleanup_free_ char *path = NULL;
18✔
385
        CustomMount *m;
18✔
386

387
        assert(l);
18✔
388
        assert(n);
18✔
389
        assert(s);
18✔
390

391
        if (!path_is_absolute(s))
18✔
392
                return -EINVAL;
393

394
        path = strdup(s);
16✔
395
        if (!path)
16✔
396
                return -ENOMEM;
397

398
        m = custom_mount_add(l, n, CUSTOM_MOUNT_INACCESSIBLE);
16✔
399
        if (!m)
16✔
400
                return -ENOMEM;
401

402
        m->destination = TAKE_PTR(path);
16✔
403
        return 0;
16✔
404
}
405

406
int tmpfs_patch_options(
1,056✔
407
                const char *options,
408
                uid_t uid_shift,
409
                const char *selinux_apifs_context,
410
                char **ret) {
411

412
        _cleanup_free_ char *buf = NULL;
1,056✔
413

414
        assert(ret);
1,056✔
415

416
        if (options) {
1,056✔
417
                buf = strdup(options);
1,056✔
418
                if (!buf)
1,056✔
419
                        return -ENOMEM;
420
        }
421

422
        if (uid_shift != UID_INVALID)
1,056✔
423
                if (strextendf_with_separator(&buf, ",", "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift) < 0)
1,044✔
424
                        return -ENOMEM;
425

426
#if HAVE_SELINUX
427
        if (selinux_apifs_context)
428
                if (strextendf_with_separator(&buf, ",", "context=\"%s\"", selinux_apifs_context) < 0)
429
                        return -ENOMEM;
430
#endif
431

432
        *ret = TAKE_PTR(buf);
1,056✔
433
        return !!*ret;
1,056✔
434
}
435

436
int mount_sysfs(const char *dest, MountSettingsMask mount_settings) {
114✔
437
        _cleanup_free_ char *top = NULL, *full = NULL;;
114✔
438
        unsigned long extra_flags = 0;
114✔
439
        int r;
114✔
440

441
        top = path_join(dest, "/sys");
114✔
442
        if (!top)
114✔
443
                return log_oom();
×
444

445
        r = path_is_mount_point(top);
114✔
446
        if (r < 0)
114✔
447
                return log_error_errno(r, "Failed to determine if '%s' is a mountpoint: %m", top);
×
448
        if (r == 0) {
114✔
449
                /* If this is not a mount point yet, then mount a tmpfs there */
450
                r = mount_nofollow_verbose(LOG_ERR, "tmpfs", top, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, "mode=0555" TMPFS_LIMITS_SYS);
1✔
451
                if (r < 0)
1✔
452
                        return r;
453
        } else {
454
                r = path_is_fs_type(top, SYSFS_MAGIC);
113✔
455
                if (r < 0)
113✔
456
                        return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
×
457

458
                /* /sys/ might already be mounted as sysfs by the outer child in the !netns case. In this case, it's
459
                 * all good. Don't touch it because we don't have the right to do so, see
460
                 * https://github.com/systemd/systemd/issues/1555.
461
                 */
462
                if (r > 0)
113✔
463
                        return 0;
464
        }
465

466
        full = path_join(top, "/full");
51✔
467
        if (!full)
51✔
468
                return log_oom();
×
469

470
        if (mkdir(full, 0755) < 0 && errno != EEXIST)
51✔
471
                return log_error_errno(errno, "Failed to create directory '%s': %m", full);
×
472

473
        if (FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO))
51✔
474
                extra_flags |= MS_RDONLY;
47✔
475

476
        r = mount_nofollow_verbose(LOG_ERR, "sysfs", full, "sysfs",
51✔
477
                                   MS_NOSUID|MS_NOEXEC|MS_NODEV|extra_flags, NULL);
478
        if (r < 0)
51✔
479
                return r;
480

481
        FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
357✔
482
                _cleanup_free_ char *from = NULL, *to = NULL;
306✔
483

484
                from = path_join(full, x);
306✔
485
                if (!from)
306✔
486
                        return log_oom();
×
487

488
                to = path_join(top, x);
306✔
489
                if (!to)
306✔
490
                        return log_oom();
×
491

492
                (void) mkdir(to, 0755);
306✔
493

494
                r = mount_nofollow_verbose(LOG_ERR, from, to, NULL, MS_BIND, NULL);
306✔
495
                if (r < 0)
306✔
496
                        return r;
497

498
                r = mount_nofollow_verbose(LOG_ERR, NULL, to, NULL,
306✔
499
                                           MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
500
                if (r < 0)
306✔
501
                        return r;
502
        }
503

504
        r = umount_verbose(LOG_ERR, full, UMOUNT_NOFOLLOW);
51✔
505
        if (r < 0)
51✔
506
                return r;
507

508
        if (rmdir(full) < 0)
51✔
509
                return log_error_errno(errno, "Failed to remove %s: %m", full);
×
510

511
        /* Create mountpoint for cgroups. Otherwise we are not allowed since we remount /sys/ read-only. */
512
        _cleanup_free_ char *x = path_join(top, "/fs/cgroup");
102✔
513
        if (!x)
51✔
514
                return log_oom();
×
515

516
        (void) mkdir_p(x, 0755);
51✔
517

518
        return mount_nofollow_verbose(LOG_ERR, NULL, top, NULL,
51✔
519
                                      MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
520
}
521

522
#define PROC_DEFAULT_MOUNT_FLAGS (MS_NOSUID|MS_NOEXEC|MS_NODEV)
523
#define SYS_DEFAULT_MOUNT_FLAGS  (MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV)
524

525
int mount_all(const char *dest,
348✔
526
              MountSettingsMask mount_settings,
527
              uid_t uid_shift,
528
              const char *selinux_apifs_context) {
529

530
#define PROC_INACCESSIBLE_REG(path)                                     \
531
        { "/run/systemd/inaccessible/reg", (path), NULL, NULL, MS_BIND, \
532
          MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
533
        { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
534
          MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
535

536
#define PROC_READ_ONLY(path)                                            \
537
        { (path), (path), NULL, NULL, MS_BIND,                          \
538
          MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
539
        { NULL,   (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
540
          MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
541

542
        typedef struct MountPoint {
348✔
543
                const char *what;
544
                const char *where;
545
                const char *type;
546
                const char *options;
547
                unsigned long flags;
548
                MountSettingsMask mount_settings;
549
        } MountPoint;
550

551
        static const MountPoint mount_table[] = {
348✔
552
                /* First we list inner child mounts (i.e. mounts applied *after* entering user namespacing when we are privileged) */
553
                { "proc",            "/proc",           "proc",  NULL,        PROC_DEFAULT_MOUNT_FLAGS,
554
                  MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_MKDIR|MOUNT_FOLLOW_SYMLINKS }, /* we follow symlinks here since not following them requires /proc/ already being mounted, which we don't have here. */
555

556
                { "/proc/sys",       "/proc/sys",       NULL,    NULL,        MS_BIND,
557
                  MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO },                          /* Bind mount first ... */
558

559
                { "/proc/sys/net",   "/proc/sys/net",   NULL,    NULL,        MS_BIND,
560
                  MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */
561

562
                { NULL,              "/proc/sys",       NULL,    NULL,        MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
563
                  MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO },                          /* ... then, make it r/o */
564

565
                /* Make these files inaccessible to container payloads: they potentially leak information about kernel
566
                 * internals or the host's execution environment to the container */
567
                PROC_INACCESSIBLE_REG("/proc/kallsyms"),
568
                PROC_INACCESSIBLE_REG("/proc/kcore"),
569
                PROC_INACCESSIBLE_REG("/proc/keys"),
570
                PROC_INACCESSIBLE_REG("/proc/sysrq-trigger"),
571
                PROC_INACCESSIBLE_REG("/proc/timer_list"),
572

573
                /* Make these directories read-only to container payloads: they show hardware information, and in some
574
                 * cases contain tunables the container really shouldn't have access to. */
575
                PROC_READ_ONLY("/proc/acpi"),
576
                PROC_READ_ONLY("/proc/apm"),
577
                PROC_READ_ONLY("/proc/asound"),
578
                PROC_READ_ONLY("/proc/bus"),
579
                PROC_READ_ONLY("/proc/fs"),
580
                PROC_READ_ONLY("/proc/irq"),
581
                PROC_READ_ONLY("/proc/scsi"),
582

583
                { "mqueue",                 "/dev/mqueue",                  "mqueue", NULL,                            MS_NOSUID|MS_NOEXEC|MS_NODEV,
584
                  MOUNT_IN_USERNS|MOUNT_MKDIR },
585

586
                /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing when we are privileged) */
587
                { "tmpfs",                  "/tmp",                         "tmpfs", "mode=01777" NESTED_TMPFS_LIMITS, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
588
                  MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP|MOUNT_MKDIR|MOUNT_USRQUOTA_GRACEFUL },
589
                { "tmpfs",                  "/sys",                         "tmpfs", "mode=0555" TMPFS_LIMITS_SYS,     MS_NOSUID|MS_NOEXEC|MS_NODEV,
590
                  MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS|MOUNT_MKDIR|MOUNT_UNMANAGED },
591
                { "sysfs",                  "/sys",                         "sysfs", NULL,                             SYS_DEFAULT_MOUNT_FLAGS,
592
                  MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO|MOUNT_MKDIR|MOUNT_UNMANAGED },    /* skipped if above was mounted */
593
                { "sysfs",                  "/sys",                         "sysfs", NULL,                             MS_NOSUID|MS_NOEXEC|MS_NODEV,
594
                  MOUNT_FATAL|MOUNT_MKDIR|MOUNT_UNMANAGED },                          /* skipped if above was mounted */
595
                { "tmpfs",                  "/dev",                         "tmpfs", "mode=0755" TMPFS_LIMITS_PRIVATE_DEV, MS_NOSUID|MS_STRICTATIME,
596
                  MOUNT_FATAL|MOUNT_MKDIR },
597
                { "tmpfs",                  "/dev/shm",                     "tmpfs", "mode=01777" NESTED_TMPFS_LIMITS, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
598
                  MOUNT_FATAL|MOUNT_MKDIR|MOUNT_USRQUOTA_GRACEFUL },
599
                { "tmpfs",                  "/run",                         "tmpfs", "mode=0755" TMPFS_LIMITS_RUN,     MS_NOSUID|MS_NODEV|MS_STRICTATIME,
600
                  MOUNT_FATAL|MOUNT_MKDIR },
601
                { "/run/host",              "/run/host",                    NULL,    NULL,                             MS_BIND,
602
                  MOUNT_FATAL|MOUNT_MKDIR|MOUNT_PREFIX_ROOT }, /* Prepare this so that we can make it read-only when we are done */
603
                { "/etc/os-release",        "/run/host/os-release",         NULL,    NULL,                             MS_BIND,
604
                  MOUNT_TOUCH }, /* As per kernel interface requirements, bind mount first (creating mount points) and make read-only later */
605
                { "/usr/lib/os-release",    "/run/host/os-release",         NULL,    NULL,                             MS_BIND,
606
                  MOUNT_FATAL }, /* If /etc/os-release doesn't exist use the version in /usr/lib as fallback */
607
                { NULL,                     "/run/host/os-release",         NULL,    NULL,                             MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
608
                  MOUNT_FATAL },
609
                { NULL,                     "/run/host/os-release",         NULL,    NULL,                             MS_PRIVATE,
610
                  MOUNT_FATAL },  /* Turn off propagation (we only want that for the mount propagation tunnel dir) */
611
                { NULL,                     "/run/host",                    NULL,    NULL,                             MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
612
                  MOUNT_FATAL|MOUNT_IN_USERNS },
613
#if HAVE_SELINUX
614
                { "/sys/fs/selinux",        "/sys/fs/selinux",              NULL,    NULL,                             MS_BIND,
615
                  MOUNT_MKDIR|MOUNT_PRIVILEGED },  /* Bind mount first (mkdir/chown the mount point in case /sys/ is mounted as minimal skeleton tmpfs) */
616
                { NULL,                     "/sys/fs/selinux",              NULL,    NULL,                             MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
617
                  MOUNT_UNMANAGED|MOUNT_PRIVILEGED },  /* Then, make it r/o (don't mkdir/chown the mount point here, the previous entry already did that) */
618
                { NULL,                     "/sys/fs/selinux",              NULL,    NULL,                             MS_PRIVATE,
619
                  MOUNT_UNMANAGED|MOUNT_PRIVILEGED },  /* Turn off propagation (we only want that for the mount propagation tunnel dir) */
620
#endif
621
        };
622

623
        bool use_userns = FLAGS_SET(mount_settings, MOUNT_USE_USERNS);
348✔
624
        bool netns = FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_NETNS);
348✔
625
        bool ro = FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO);
348✔
626
        bool in_userns = FLAGS_SET(mount_settings, MOUNT_IN_USERNS);
348✔
627
        bool tmpfs_tmp = FLAGS_SET(mount_settings, MOUNT_APPLY_TMPFS_TMP);
348✔
628
        bool unmanaged = FLAGS_SET(mount_settings, MOUNT_UNMANAGED);
348✔
629
        bool privileged = FLAGS_SET(mount_settings, MOUNT_PRIVILEGED);
348✔
630
        int r;
348✔
631

632
        FOREACH_ELEMENT(m, mount_table) {
14,964✔
633
                _cleanup_free_ char *where = NULL, *options = NULL, *prefixed = NULL;
14,616✔
634
                bool fatal = FLAGS_SET(m->mount_settings, MOUNT_FATAL);
14,616✔
635
                const char *o;
14,616✔
636

637
                /* If we are in managed user namespace mode but the entry is marked for mount outside of
638
                 * managed user namespace mode, and to be mounted outside the user namespace, then skip it */
639
                if (!unmanaged && FLAGS_SET(m->mount_settings, MOUNT_UNMANAGED) && !FLAGS_SET(m->mount_settings, MOUNT_IN_USERNS))
14,616✔
640
                        continue;
27✔
641

642
                if (in_userns != FLAGS_SET(m->mount_settings, MOUNT_IN_USERNS))
14,589✔
643
                        continue;
8,385✔
644

645
                if (!netns && FLAGS_SET(m->mount_settings, MOUNT_APPLY_APIVFS_NETNS))
6,204✔
646
                        continue;
189✔
647

648
                if (!ro && FLAGS_SET(m->mount_settings, MOUNT_APPLY_APIVFS_RO))
6,015✔
649
                        continue;
228✔
650

651
                if (!tmpfs_tmp && FLAGS_SET(m->mount_settings, MOUNT_APPLY_TMPFS_TMP))
5,787✔
652
                        continue;
×
653

654
                if (!privileged && FLAGS_SET(m->mount_settings, MOUNT_PRIVILEGED))
5,787✔
655
                        continue;
×
656

657
                r = chase(m->where, dest, CHASE_NONEXISTENT|CHASE_PREFIX_ROOT, &where, NULL);
5,787✔
658
                if (r < 0)
5,787✔
659
                        return log_error_errno(r, "Failed to resolve %s%s: %m", strempty(dest), m->where);
×
660

661
                /* Skip this entry if it is not a remount. */
662
                if (m->what) {
5,787✔
663
                        r = path_is_mount_point(where);
3,827✔
664
                        if (r < 0 && r != -ENOENT)
3,827✔
665
                                return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
×
666
                        if (r > 0)
3,827✔
667
                                continue;
545✔
668
                }
669

670
                if ((m->mount_settings & (MOUNT_MKDIR|MOUNT_TOUCH)) != 0) {
5,242✔
671
                        uid_t u = (use_userns && !in_userns) ? uid_shift : UID_INVALID;
1,857✔
672

673
                        if (FLAGS_SET(m->mount_settings, MOUNT_TOUCH))
1,857✔
674
                                r = mkdir_parents_safe(dest, where, 0755, u, u, 0);
234✔
675
                        else
676
                                r = mkdir_p_safe(dest, where, 0755, u, u, 0);
1,623✔
677
                        if (r < 0 && r != -EEXIST) {
1,857✔
678
                                if (fatal && r != -EROFS)
×
679
                                        return log_error_errno(r, "Failed to create directory %s: %m", where);
×
680

681
                                log_debug_errno(r, "Failed to create directory %s: %m", where);
×
682

683
                                /* If we failed mkdir() or chown() due to the root directory being read only,
684
                                 * attempt to mount this fs anyway and let mount_verbose log any errors */
685
                                if (r != -EROFS)
×
686
                                        continue;
×
687
                        }
688
                }
689

690
                if (FLAGS_SET(m->mount_settings, MOUNT_TOUCH)) {
5,242✔
691
                        r = touch(where);
234✔
692
                        if (r < 0 && r != -EEXIST) {
234✔
693
                                if (fatal && r != -EROFS)
×
694
                                        return log_error_errno(r, "Failed to create file %s: %m", where);
×
695

696
                                log_debug_errno(r, "Failed to create file %s: %m", where);
×
697
                                if (r != -EROFS)
×
698
                                        continue;
×
699
                        }
700
                }
701

702
                o = m->options;
5,242✔
703
                if (streq_ptr(m->type, "tmpfs")) {
5,242✔
704
                        r = tmpfs_patch_options(o, in_userns ? 0 : uid_shift, selinux_apifs_context, &options);
2,072✔
705
                        if (r < 0)
1,036✔
706
                                return log_oom();
×
707
                        if (r > 0)
1,036✔
708
                                o = options;
1,036✔
709
                }
710

711
                if (FLAGS_SET(m->mount_settings, MOUNT_USRQUOTA_GRACEFUL)) {
5,242✔
712
                        r = mount_option_supported(m->type, /* key= */ "usrquota", /* value= */ NULL);
468✔
713
                        if (r < 0)
468✔
714
                                log_warning_errno(r, "Failed to determine if '%s' supports 'usrquota', assuming it doesn't: %m", m->type);
×
715
                        else if (r == 0)
468✔
716
                                log_debug("Kernel doesn't support 'usrquota' on '%s', not including in mount options for '%s'.", m->type, m->where);
16✔
717
                        else {
718
                                _cleanup_free_ char *joined = NULL;
×
719

720
                                if (!strextend_with_separator(&joined, ",", o ?: POINTER_MAX, "usrquota"))
452✔
721
                                        return log_oom();
×
722

723
                                free_and_replace(options, joined);
452✔
724
                                o = options;
452✔
725
                        }
726
                }
727

728
                if (FLAGS_SET(m->mount_settings, MOUNT_PREFIX_ROOT)) {
5,242✔
729
                        /* Optionally prefix the mount source with the root dir. This is useful in bind
730
                         * mounts to be created within the container image before we transition into it. Note
731
                         * that MOUNT_IN_USERNS is run after we transitioned hence prefixing is not necessary
732
                         * for those. */
733
                        r = chase(m->what, dest, CHASE_PREFIX_ROOT, &prefixed, NULL);
234✔
734
                        if (r < 0)
234✔
735
                                return log_error_errno(r, "Failed to resolve %s%s: %m", strempty(dest), m->what);
×
736
                }
737

738
                r = mount_verbose_full(
8,134✔
739
                                fatal ? LOG_ERR : LOG_DEBUG,
740
                                prefixed ?: m->what,
5,242✔
741
                                where,
742
                                m->type,
5,242✔
743
                                m->flags,
5,242✔
744
                                o,
745
                                FLAGS_SET(m->mount_settings, MOUNT_FOLLOW_SYMLINKS));
5,242✔
746
                if (r < 0 && fatal)
5,242✔
747
                        return r;
748
        }
749

750
        return 0;
751
}
752

753
static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts, RemountIdmapping *idmapping) {
4✔
754
        unsigned long flags = *mount_flags;
4✔
755
        char *opts = NULL;
4✔
756
        RemountIdmapping new_idmapping = *idmapping;
4✔
757
        int r;
4✔
758

759
        assert(options);
4✔
760

761
        for (;;) {
16✔
762
                _cleanup_free_ char *word = NULL;
6✔
763

764
                r = extract_first_word(&options, &word, ",", 0);
10✔
765
                if (r < 0)
10✔
766
                        return log_error_errno(r, "Failed to extract mount option: %m");
×
767
                if (r == 0)
10✔
768
                        break;
769

770
                if (streq(word, "rbind"))
6✔
771
                        flags |= MS_REC;
×
772
                else if (streq(word, "norbind"))
6✔
773
                        flags &= ~MS_REC;
2✔
774
                else if (streq(word, "idmap"))
4✔
775
                        new_idmapping = REMOUNT_IDMAPPING_HOST_ROOT;
776
                else if (streq(word, "noidmap"))
4✔
777
                        new_idmapping = REMOUNT_IDMAPPING_NONE;
778
                else if (streq(word, "rootidmap"))
2✔
779
                        new_idmapping = REMOUNT_IDMAPPING_HOST_OWNER;
780
                else if (streq(word, "owneridmap"))
2✔
781
                        new_idmapping = REMOUNT_IDMAPPING_HOST_OWNER_TO_TARGET_OWNER;
782
                else
783
                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
784
                                               "Invalid bind mount option: %s", word);
785
        }
786

787
        *mount_flags = flags;
4✔
788
        *idmapping = new_idmapping;
4✔
789
        /* in the future mount_opts will hold string options for mount(2) */
790
        *mount_opts = opts;
4✔
791

792
        return 0;
4✔
793
}
794

795
static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t uid_range) {
266✔
796
        _cleanup_free_ char *mount_opts = NULL, *where = NULL;
266✔
797
        unsigned long mount_flags = MS_BIND | MS_REC;
266✔
798
        struct stat source_st, dest_st;
266✔
799
        uid_t dest_uid = UID_INVALID;
266✔
800
        int r;
266✔
801
        RemountIdmapping idmapping = REMOUNT_IDMAPPING_NONE;
266✔
802

803
        assert(dest);
266✔
804
        assert(m);
266✔
805

806
        if (m->options) {
266✔
807
                r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts, &idmapping);
4✔
808
                if (r < 0)
4✔
809
                        return r;
810
        }
811

812
        /* If this is a bind mount from a temporary sources change ownership of the source to the container's
813
         * root UID. Otherwise it would always show up as "nobody" if user namespacing is used. */
814
        if (m->rm_rf_tmpdir && chown(m->source, uid_shift, uid_shift) < 0)
266✔
815
                return log_error_errno(errno, "Failed to chown %s: %m", m->source);
×
816

817
        if (stat(m->source, &source_st) < 0)
266✔
818
                return log_error_errno(errno, "Failed to stat %s: %m", m->source);
×
819

820
        r = chase(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
266✔
821
        if (r < 0)
266✔
822
                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
823
        if (r > 0) { /* Path exists already? */
266✔
824

825
                if (stat(where, &dest_st) < 0)
150✔
826
                        return log_error_errno(errno, "Failed to stat %s: %m", where);
×
827

828
                dest_uid = dest_st.st_uid;
150✔
829

830
                if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode))
150✔
831
                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
832
                                               "Cannot bind mount directory %s on file %s.",
833
                                               m->source, where);
834

835
                if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode))
150✔
836
                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
837
                                               "Cannot bind mount file %s on directory %s.",
838
                                               m->source, where);
839

840
        } else { /* Path doesn't exist yet? */
841
                r = mkdir_parents_safe_label(dest, where, 0755, uid_shift, uid_shift, MKDIR_IGNORE_EXISTING);
116✔
842
                if (r < 0)
116✔
843
                        return log_error_errno(r, "Failed to make parents of %s: %m", where);
×
844

845
                /* Create the mount point. Any non-directory file can be
846
                * mounted on any non-directory file (regular, fifo, socket,
847
                * char, block).
848
                */
849
                if (S_ISDIR(source_st.st_mode))
116✔
850
                        r = mkdir_label(where, 0755);
114✔
851
                else
852
                        r = touch(where);
2✔
853
                if (r < 0)
116✔
854
                        return log_error_errno(r, "Failed to create mount point %s: %m", where);
×
855

856
                if (chown(where, uid_shift, uid_shift) < 0)
116✔
857
                        return log_error_errno(errno, "Failed to chown %s: %m", where);
×
858

859
                dest_uid = uid_shift;
860
        }
861

862
        r = mount_nofollow_verbose(LOG_ERR, m->source, where, NULL, mount_flags, mount_opts);
266✔
863
        if (r < 0)
266✔
864
                return r;
865

866
        if (m->read_only) {
266✔
867
                r = bind_remount_recursive(where, MS_RDONLY, MS_RDONLY, NULL);
2✔
868
                if (r < 0)
2✔
869
                        return log_error_errno(r, "Read-only bind mount failed: %m");
×
870
        }
871

872
        if (idmapping != REMOUNT_IDMAPPING_NONE) {
266✔
873
                r = remount_idmap(STRV_MAKE(where), uid_shift, uid_range, source_st.st_uid, dest_uid, idmapping);
2✔
874
                if (r < 0)
2✔
875
                        return log_error_errno(r, "Failed to map ids for bind mount %s: %m", where);
×
876
        }
877

878
        return 0;
879
}
880

881
static int mount_tmpfs(const char *dest, CustomMount *m, uid_t uid_shift, const char *selinux_apifs_context) {
4✔
882
        const char *options;
4✔
883
        _cleanup_free_ char *buf = NULL, *where = NULL;
4✔
884
        int r;
4✔
885

886
        assert(dest);
4✔
887
        assert(m);
4✔
888

889
        r = chase(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
4✔
890
        if (r < 0)
4✔
891
                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
892
        if (r == 0) { /* Doesn't exist yet? */
4✔
893
                r = mkdir_p_label(where, 0755);
×
894
                if (r < 0)
×
895
                        return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
×
896
        }
897

898
        r = tmpfs_patch_options(m->options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
8✔
899
        if (r < 0)
4✔
900
                return log_oom();
×
901
        options = r > 0 ? buf : m->options;
4✔
902

903
        return mount_nofollow_verbose(LOG_ERR, "tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options);
4✔
904
}
905

906
static char *joined_and_escaped_lower_dirs(char **lower) {
2✔
907
        _cleanup_strv_free_ char **sv = NULL;
×
908

909
        sv = strv_copy(lower);
2✔
910
        if (!sv)
2✔
911
                return NULL;
912

913
        strv_reverse(sv);
2✔
914

915
        if (!strv_shell_escape(sv, ",:"))
2✔
916
                return NULL;
917

918
        return strv_join(sv, ":");
2✔
919
}
920

921
static int mount_overlay(const char *dest, CustomMount *m) {
2✔
922
        _cleanup_free_ char *lower = NULL, *where = NULL, *escaped_source = NULL;
2✔
923
        const char *options;
2✔
924
        int r;
2✔
925

926
        assert(dest);
2✔
927
        assert(m);
2✔
928

929
        r = chase(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
2✔
930
        if (r < 0)
2✔
931
                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
932
        if (r == 0) { /* Doesn't exist yet? */
2✔
933
                r = mkdir_label(where, 0755);
×
934
                if (r < 0)
×
935
                        return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
×
936
        }
937

938
        (void) mkdir_p_label(m->source, 0755);
2✔
939

940
        lower = joined_and_escaped_lower_dirs(m->lower);
2✔
941
        if (!lower)
2✔
942
                return log_oom();
×
943

944
        escaped_source = shell_escape(m->source, ",:");
2✔
945
        if (!escaped_source)
2✔
946
                return log_oom();
×
947

948
        if (m->read_only)
2✔
949
                options = strjoina("lowerdir=", escaped_source, ":", lower);
×
950
        else {
951
                _cleanup_free_ char *escaped_work_dir = NULL;
2✔
952

953
                escaped_work_dir = shell_escape(m->work_dir, ",:");
2✔
954
                if (!escaped_work_dir)
2✔
955
                        return log_oom();
×
956

957
                options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
26✔
958
        }
959

960
        return mount_nofollow_verbose(LOG_ERR, "overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options);
2✔
961
}
962

963
static int mount_inaccessible(const char *dest, CustomMount *m) {
4✔
964
        _cleanup_free_ char *where = NULL, *source = NULL;
4✔
965
        struct stat st;
4✔
966
        int r;
4✔
967

968
        assert(dest);
4✔
969
        assert(m);
4✔
970

971
        r = chase_and_stat(m->destination, dest, CHASE_PREFIX_ROOT, &where, &st);
4✔
972
        if (r < 0) {
4✔
973
                log_full_errno(m->graceful ? LOG_DEBUG : LOG_ERR, r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
974
                return m->graceful ? 0 : r;
×
975
        }
976

977
        r = mode_to_inaccessible_node(NULL, st.st_mode, &source);
4✔
978
        if (r < 0)
4✔
979
                return m->graceful ? 0 : r;
×
980

981
        r = mount_nofollow_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, source, where, NULL, MS_BIND, NULL);
4✔
982
        if (r < 0)
4✔
983
                return m->graceful ? 0 : r;
×
984

985
        r = mount_nofollow_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, NULL, where, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL);
4✔
986
        if (r < 0) {
4✔
987
                (void) umount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, where, UMOUNT_NOFOLLOW);
×
988
                return m->graceful ? 0 : r;
×
989
        }
990

991
        return 0;
992
}
993

994
static int mount_arbitrary(const char *dest, CustomMount *m) {
×
995
        _cleanup_free_ char *where = NULL;
×
996
        int r;
×
997

998
        assert(dest);
×
999
        assert(m);
×
1000

1001
        r = chase(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
×
1002
        if (r < 0)
×
1003
                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
1004
        if (r == 0) { /* Doesn't exist yet? */
×
1005
                r = mkdir_p_label(where, 0755);
×
1006
                if (r < 0)
×
1007
                        return log_error_errno(r, "Creating mount point for mount %s failed: %m", where);
×
1008
        }
1009

1010
        return mount_nofollow_verbose(LOG_ERR, m->source, where, m->type_argument, 0, m->options);
×
1011
}
1012

1013
int mount_custom(
582✔
1014
                const char *dest,
1015
                CustomMount *mounts, size_t n,
1016
                uid_t uid_shift,
1017
                uid_t uid_range,
1018
                const char *selinux_apifs_context,
1019
                MountSettingsMask mount_settings) {
1020
        int r;
582✔
1021

1022
        assert(dest);
582✔
1023

1024
        FOREACH_ARRAY(m, mounts, n) {
1,272✔
1025
                if (FLAGS_SET(mount_settings, MOUNT_IN_USERNS) != m->in_userns)
690✔
1026
                        continue;
138✔
1027

1028
                if (FLAGS_SET(mount_settings, MOUNT_ROOT_ONLY) && !path_equal(m->destination, "/"))
552✔
1029
                        continue;
276✔
1030

1031
                if (FLAGS_SET(mount_settings, MOUNT_NON_ROOT_ONLY) && path_equal(m->destination, "/"))
276✔
1032
                        continue;
×
1033

1034
                switch (m->type) {
276✔
1035

1036
                case CUSTOM_MOUNT_BIND:
266✔
1037
                        r = mount_bind(dest, m, uid_shift, uid_range);
266✔
1038
                        break;
266✔
1039

1040
                case CUSTOM_MOUNT_TMPFS:
4✔
1041
                        r = mount_tmpfs(dest, m, uid_shift, selinux_apifs_context);
4✔
1042
                        break;
4✔
1043

1044
                case CUSTOM_MOUNT_OVERLAY:
2✔
1045
                        r = mount_overlay(dest, m);
2✔
1046
                        break;
2✔
1047

1048
                case CUSTOM_MOUNT_INACCESSIBLE:
4✔
1049
                        r = mount_inaccessible(dest, m);
4✔
1050
                        break;
4✔
1051

1052
                case CUSTOM_MOUNT_ARBITRARY:
×
1053
                        r = mount_arbitrary(dest, m);
×
1054
                        break;
×
1055

1056
                default:
×
1057
                        assert_not_reached();
×
1058
                }
1059

1060
                if (r < 0)
276✔
1061
                        return r;
1062
        }
1063

1064
        return 0;
1065
}
1066

1067
bool has_custom_root_mount(const CustomMount *mounts, size_t n) {
455✔
1068
        FOREACH_ARRAY(m, mounts, n)
931✔
1069
                if (path_equal(m->destination, "/"))
476✔
1070
                        return true;
1071

1072
        return false;
1073
}
1074

1075
static int setup_volatile_state(const char *directory) {
4✔
1076
        int r;
4✔
1077

1078
        assert(directory);
4✔
1079

1080
        /* --volatile=state means we simply overmount /var with a tmpfs, and the rest read-only. */
1081

1082
        /* First, remount the root directory. */
1083
        r = bind_remount_recursive(directory, MS_RDONLY, MS_RDONLY, NULL);
4✔
1084
        if (r < 0)
4✔
1085
                return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
×
1086

1087
        return 0;
1088
}
1089

1090
static int setup_volatile_state_after_remount_idmap(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
4✔
1091
        _cleanup_free_ char *buf = NULL;
8✔
1092
        int r;
4✔
1093

1094
        assert(directory);
4✔
1095

1096
        /* Then, after remount_idmap(), overmount /var/ with a tmpfs. */
1097

1098
        _cleanup_free_ char *p = path_join(directory, "/var");
8✔
1099
        if (!p)
4✔
1100
                return log_oom();
×
1101

1102
        r = mkdir(p, 0755);
4✔
1103
        if (r < 0 && errno != EEXIST)
4✔
1104
                return log_error_errno(errno, "Failed to create %s: %m", directory);
×
1105

1106
        const char *options = "mode=0755" TMPFS_LIMITS_VOLATILE_STATE;
4✔
1107
        r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
6✔
1108
        if (r < 0)
4✔
1109
                return log_oom();
×
1110
        if (r > 0)
4✔
1111
                options = buf;
4✔
1112

1113
        return mount_nofollow_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options);
4✔
1114
}
1115

1116
static int setup_volatile_yes(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
8✔
1117
        bool tmpfs_mounted = false, bind_mounted = false;
8✔
1118
        _cleanup_(rmdir_and_freep) char *template = NULL;
×
1119
        _cleanup_free_ char *buf = NULL, *bindir = NULL, *f = NULL, *t = NULL;
8✔
1120
        struct stat st;
8✔
1121
        int r;
8✔
1122

1123
        assert(directory);
8✔
1124

1125
        /* --volatile=yes means we mount a tmpfs to the root dir, and the original /usr to use inside it, and
1126
         * that read-only. Before we start setting this up let's validate if the image has the /usr merge
1127
         * implemented, and let's output a friendly log message if it hasn't. */
1128

1129
        bindir = path_join(directory, "/bin");
8✔
1130
        if (!bindir)
8✔
1131
                return log_oom();
×
1132
        if (lstat(bindir, &st) < 0) {
8✔
1133
                if (errno != ENOENT)
×
1134
                        return log_error_errno(errno, "Failed to stat /bin directory below image: %m");
×
1135

1136
                /* ENOENT is fine, just means the image is probably just a naked /usr and we can create the
1137
                 * rest. */
1138
        } else if (S_ISDIR(st.st_mode))
8✔
1139
                return log_error_errno(SYNTHETIC_ERRNO(EISDIR),
×
1140
                                       "Sorry, --volatile=yes mode is not supported with OS images that have not merged /bin/, /sbin/, /lib/, /lib64/ into /usr/. "
1141
                                       "Please work with your distribution and help them adopt the merged /usr scheme.");
1142
        else if (!S_ISLNK(st.st_mode))
8✔
1143
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
1144
                                       "Error starting image: if --volatile=yes is used /bin must be a symlink (for merged /usr support) or non-existent (in which case a symlink is created automatically).");
1145

1146
        r = mkdtemp_malloc("/tmp/nspawn-volatile-XXXXXX", &template);
8✔
1147
        if (r < 0)
8✔
1148
                return log_error_errno(r, "Failed to create temporary directory: %m");
×
1149

1150
        const char *options = "mode=0755" TMPFS_LIMITS_ROOTFS;
8✔
1151
        r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
12✔
1152
        if (r < 0)
8✔
1153
                goto fail;
×
1154
        if (r > 0)
8✔
1155
                options = buf;
8✔
1156

1157
        r = mount_nofollow_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
8✔
1158
        if (r < 0)
8✔
1159
                goto fail;
×
1160

1161
        tmpfs_mounted = true;
8✔
1162

1163
        f = path_join(directory, "/usr");
8✔
1164
        if (!f) {
8✔
1165
                r = log_oom();
×
1166
                goto fail;
×
1167
        }
1168

1169
        t = path_join(template, "/usr");
8✔
1170
        if (!t) {
8✔
1171
                r = log_oom();
×
1172
                goto fail;
×
1173
        }
1174

1175
        r = mkdir(t, 0755);
8✔
1176
        if (r < 0 && errno != EEXIST) {
8✔
1177
                r = log_error_errno(errno, "Failed to create %s: %m", t);
×
1178
                goto fail;
×
1179
        }
1180

1181
        r = mount_nofollow_verbose(LOG_ERR, f, t, NULL, MS_BIND|MS_REC, NULL);
8✔
1182
        if (r < 0)
8✔
1183
                goto fail;
×
1184

1185
        bind_mounted = true;
8✔
1186

1187
        r = bind_remount_recursive(t, MS_RDONLY, MS_RDONLY, NULL);
8✔
1188
        if (r < 0) {
8✔
1189
                log_error_errno(r, "Failed to remount %s read-only: %m", t);
×
1190
                goto fail;
×
1191
        }
1192

1193
        r = mount_nofollow_verbose(LOG_ERR, template, directory, NULL, MS_MOVE, NULL);
8✔
1194
        if (r < 0)
8✔
1195
                goto fail;
×
1196

1197
        (void) rmdir(template);
8✔
1198

1199
        return 0;
8✔
1200

1201
fail:
1202
        if (bind_mounted)
×
1203
                (void) umount_verbose(LOG_ERR, t, UMOUNT_NOFOLLOW);
×
1204

1205
        if (tmpfs_mounted)
×
1206
                (void) umount_verbose(LOG_ERR, template, UMOUNT_NOFOLLOW);
×
1207

1208
        return r;
1209
}
1210

1211
static int setup_volatile_overlay(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
4✔
1212
        _cleanup_free_ char *buf = NULL, *escaped_directory = NULL, *escaped_upper = NULL, *escaped_work = NULL;
4✔
1213
        _cleanup_(rmdir_and_freep) char *template = NULL;
4✔
1214
        const char *upper, *work, *options;
4✔
1215
        bool tmpfs_mounted = false;
4✔
1216
        int r;
4✔
1217

1218
        assert(directory);
4✔
1219

1220
        /* --volatile=overlay means we mount an overlayfs to the root dir. */
1221

1222
        r = mkdtemp_malloc("/tmp/nspawn-volatile-XXXXXX", &template);
4✔
1223
        if (r < 0)
4✔
1224
                return log_error_errno(r, "Failed to create temporary directory: %m");
×
1225

1226
        options = "mode=0755" TMPFS_LIMITS_ROOTFS;
4✔
1227
        r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
6✔
1228
        if (r < 0)
4✔
1229
                goto finish;
×
1230
        if (r > 0)
4✔
1231
                options = buf;
4✔
1232

1233
        r = mount_nofollow_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
4✔
1234
        if (r < 0)
4✔
1235
                goto finish;
×
1236

1237
        tmpfs_mounted = true;
4✔
1238

1239
        upper = strjoina(template, "/upper");
20✔
1240
        work = strjoina(template, "/work");
20✔
1241

1242
        if (mkdir(upper, 0755) < 0) {
4✔
1243
                r = log_error_errno(errno, "Failed to create %s: %m", upper);
×
1244
                goto finish;
×
1245
        }
1246
        if (mkdir(work, 0755) < 0) {
4✔
1247
                r = log_error_errno(errno, "Failed to create %s: %m", work);
×
1248
                goto finish;
×
1249
        }
1250

1251
        /* And now, let's overmount the root dir with an overlayfs that uses the root dir as lower dir. It's kinda nice
1252
         * that the kernel allows us to do that without going through some mount point rearrangements. */
1253

1254
        escaped_directory = shell_escape(directory, ",:");
4✔
1255
        escaped_upper = shell_escape(upper, ",:");
4✔
1256
        escaped_work = shell_escape(work, ",:");
4✔
1257
        if (!escaped_directory || !escaped_upper || !escaped_work) {
4✔
1258
                r = -ENOMEM;
×
1259
                goto finish;
×
1260
        }
1261

1262
        options = strjoina("lowerdir=", escaped_directory, ",upperdir=", escaped_upper, ",workdir=", escaped_work);
52✔
1263
        r = mount_nofollow_verbose(LOG_ERR, "overlay", directory, "overlay", 0, options);
4✔
1264

1265
finish:
1266
        if (tmpfs_mounted)
×
1267
                (void) umount_verbose(LOG_ERR, template, UMOUNT_NOFOLLOW);
4✔
1268

1269
        return r;
1270
}
1271

1272
int setup_volatile_mode(
236✔
1273
                const char *directory,
1274
                VolatileMode mode,
1275
                uid_t uid_shift,
1276
                const char *selinux_apifs_context) {
1277

1278
        switch (mode) {
236✔
1279

1280
        case VOLATILE_YES:
8✔
1281
                return setup_volatile_yes(directory, uid_shift, selinux_apifs_context);
8✔
1282

1283
        case VOLATILE_STATE:
4✔
1284
                return setup_volatile_state(directory);
4✔
1285

1286
        case VOLATILE_OVERLAY:
4✔
1287
                return setup_volatile_overlay(directory, uid_shift, selinux_apifs_context);
4✔
1288

1289
        default:
1290
                return 0;
1291
        }
1292
}
1293

1294
int setup_volatile_mode_after_remount_idmap(
234✔
1295
                const char *directory,
1296
                VolatileMode mode,
1297
                uid_t uid_shift,
1298
                const char *selinux_apifs_context) {
1299

1300
        switch (mode) {
234✔
1301

1302
        case VOLATILE_STATE:
4✔
1303
                return setup_volatile_state_after_remount_idmap(directory, uid_shift, selinux_apifs_context);
4✔
1304

1305
        default:
1306
                return 0;
1307
        }
1308
}
1309

1310
/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
1311
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
2✔
1312
        _cleanup_free_ char *root_new = NULL, *root_old = NULL;
2✔
1313
        const char *p = s;
2✔
1314
        int r;
2✔
1315

1316
        assert(pivot_root_new);
2✔
1317
        assert(pivot_root_old);
2✔
1318

1319
        r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
2✔
1320
        if (r < 0)
2✔
1321
                return r;
1322
        if (r == 0)
2✔
1323
                return -EINVAL;
1324

1325
        if (isempty(p))
2✔
1326
                root_old = NULL;
1327
        else {
1328
                root_old = strdup(p);
×
1329
                if (!root_old)
×
1330
                        return -ENOMEM;
1331
        }
1332

1333
        if (!path_is_absolute(root_new))
2✔
1334
                return -EINVAL;
1335
        if (root_old && !path_is_absolute(root_old))
×
1336
                return -EINVAL;
1337

1338
        free_and_replace(*pivot_root_new, root_new);
×
1339
        free_and_replace(*pivot_root_old, root_old);
×
1340

1341
        return 0;
×
1342
}
1343

1344
int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) {
236✔
1345
        _cleanup_free_ char *directory_pivot_root_new = NULL;
472✔
1346
        _cleanup_free_ char *pivot_tmp_pivot_root_old = NULL;
236✔
1347
        _cleanup_(rmdir_and_freep) char *pivot_tmp = NULL;
236✔
1348
        int r;
236✔
1349

1350
        assert(directory);
236✔
1351

1352
        if (!pivot_root_new)
236✔
1353
                return 0;
1354

1355
        /* Pivot pivot_root_new to / and the existing / to pivot_root_old.
1356
         * If pivot_root_old is NULL, the existing / disappears.
1357
         * This requires a temporary directory, pivot_tmp, which is
1358
         * not a child of either.
1359
         *
1360
         * This is typically used for OSTree-style containers, where the root partition contains several
1361
         * sysroots which could be run. Normally, one would be chosen by the bootloader and pivoted to / by
1362
         * initrd.
1363
         *
1364
         * For example, for an OSTree deployment, pivot_root_new
1365
         * would be: /ostree/deploy/$os/deploy/$checksum. Note that this
1366
         * code doesn’t do the /var mount which OSTree expects: use
1367
         * --bind +/sysroot/ostree/deploy/$os/var:/var for that.
1368
         *
1369
         * So in the OSTree case, we’ll end up with something like:
1370
         *  - directory = /tmp/nspawn-root-123456
1371
         *  - pivot_root_new = /ostree/deploy/os/deploy/123abc
1372
         *  - pivot_root_old = /sysroot
1373
         *  - directory_pivot_root_new =
1374
         *       /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc
1375
         *  - pivot_tmp = /tmp/nspawn-pivot-123456
1376
         *  - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot
1377
         *
1378
         * Requires all file systems at directory and below to be mounted
1379
         * MS_PRIVATE or MS_SLAVE so they can be moved.
1380
         */
1381
        directory_pivot_root_new = path_join(directory, pivot_root_new);
×
1382
        if (!directory_pivot_root_new)
×
1383
                return log_oom();
×
1384

1385
        /* Remount directory_pivot_root_new to make it movable. */
1386
        r = mount_nofollow_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL);
×
1387
        if (r < 0)
×
1388
                return r;
1389

1390
        if (pivot_root_old) {
×
1391
                r = mkdtemp_malloc("/tmp/nspawn-pivot-XXXXXX", &pivot_tmp);
×
1392
                if (r < 0)
×
1393
                        return log_error_errno(r, "Failed to create temporary directory: %m");
×
1394

1395
                pivot_tmp_pivot_root_old = path_join(pivot_tmp, pivot_root_old);
×
1396
                if (!pivot_tmp_pivot_root_old)
×
1397
                        return log_oom();
×
1398

1399
                r = mount_nofollow_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL);
×
1400
                if (r < 0)
×
1401
                        return r;
1402

1403
                r = mount_nofollow_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL);
×
1404
                if (r < 0)
×
1405
                        return r;
1406

1407
                r = mount_nofollow_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL);
×
1408
        } else
1409
                r = mount_nofollow_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL);
×
1410

1411
        if (r < 0)
×
1412
                return r;
×
1413

1414
        return 0;
1415
}
1416

1417
#define NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS "/run/host/proc"
1418
#define NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS "/run/host/sys"
1419

1420
int pin_fully_visible_api_fs(void) {
104✔
1421
        int r;
104✔
1422

1423
        log_debug("Pinning fully visible API FS");
104✔
1424

1425
        (void) mkdir_p(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, 0755);
104✔
1426
        (void) mkdir_p(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, 0755);
104✔
1427

1428
        r = mount_follow_verbose(LOG_ERR, "proc", NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, "proc", PROC_DEFAULT_MOUNT_FLAGS, NULL);
104✔
1429
        if (r < 0)
104✔
1430
                return r;
1431

1432
        r = mount_follow_verbose(LOG_ERR, "sysfs", NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, "sysfs", SYS_DEFAULT_MOUNT_FLAGS, NULL);
104✔
1433
        if (r < 0)
104✔
1434
                return r;
×
1435

1436
        return 0;
1437
}
1438

1439
static int do_wipe_fully_visible_api_fs(void) {
57✔
1440
        if (umount2(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, MNT_DETACH) < 0)
57✔
1441
                return log_error_errno(errno, "Failed to unmount temporary proc: %m");
×
1442

1443
        if (rmdir(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS) < 0)
57✔
1444
                return log_error_errno(errno, "Failed to remove temporary proc mountpoint: %m");
×
1445

1446
        if (umount2(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, MNT_DETACH) < 0)
57✔
1447
                return log_error_errno(errno, "Failed to unmount temporary sys: %m");
×
1448

1449
        if (rmdir(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS) < 0)
57✔
1450
                return log_error_errno(errno, "Failed to remove temporary sys mountpoint: %m");
×
1451

1452
        return 0;
1453
}
1454

1455
int wipe_fully_visible_api_fs(int mntns_fd) {
57✔
1456
        _cleanup_close_ int orig_mntns_fd = -EBADF;
57✔
1457
        int r, rr;
57✔
1458

1459
        log_debug("Wiping fully visible API FS");
57✔
1460

1461
        orig_mntns_fd = namespace_open_by_type(NAMESPACE_MOUNT);
57✔
1462
        if (orig_mntns_fd < 0)
57✔
1463
                return log_error_errno(orig_mntns_fd, "Failed to pin originating mount namespace: %m");
×
1464

1465
        r = namespace_enter(/* pidns_fd = */ -EBADF,
57✔
1466
                            mntns_fd,
1467
                            /* netns_fd = */ -EBADF,
1468
                            /* userns_fd = */ -EBADF,
1469
                            /* root_fd = */ -EBADF);
1470
        if (r < 0)
57✔
1471
                return log_error_errno(r, "Failed to enter mount namespace: %m");
×
1472

1473
        rr = do_wipe_fully_visible_api_fs();
57✔
1474

1475
        r = namespace_enter(/* pidns_fd = */ -EBADF,
57✔
1476
                            orig_mntns_fd,
1477
                            /* netns_fd = */ -EBADF,
1478
                            /* userns_fd = */ -EBADF,
1479
                            /* root_fd = */ -EBADF);
1480
        if (r < 0)
57✔
1481
                return log_error_errno(r, "Failed to enter original mount namespace: %m");
×
1482

1483
        return rr;
1484
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc