• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 14630481637

23 Apr 2025 07:04PM UTC coverage: 72.178% (-0.002%) from 72.18%
14630481637

push

github

DaanDeMeyer
mkosi: Run clangd within the tools tree instead of the build container

Running within the build sandbox has a number of disadvantages:
- We have a separate clangd cache for each distribution/release combo
- It requires to build the full image before clangd can be used
- It breaks every time the image becomes out of date and requires a
  rebuild
- We can't look at system headers as we don't have the knowledge to map
  them from inside the build sandbox to the corresponding path on the host

Instead, let's have mkosi.clangd run clangd within the tools tree. We
already require building systemd for both the host and the target anyway,
and all the dependencies to build systemd are installed in the tools tree
already for that, as well as clangd since it's installed together with the
other clang tooling we install in the tools tree. Unlike the previous approach,
this approach only requires the mkosi tools tree to be built upfront, which has
a much higher chance of not invalidating its cache. We can also trivially map
system header lookups from within the sandbox to the path within mkosi.tools
on the host so that starts working as well.

297054 of 411557 relevant lines covered (72.18%)

686269.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.93
/src/nspawn/nspawn-mount.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <sys/mount.h>
4
#include <linux/magic.h>
5

6
#include "alloc-util.h"
7
#include "chase.h"
8
#include "escape.h"
9
#include "fd-util.h"
10
#include "format-util.h"
11
#include "fs-util.h"
12
#include "label-util.h"
13
#include "log.h"
14
#include "mkdir-label.h"
15
#include "mount-util.h"
16
#include "mountpoint-util.h"
17
#include "namespace-util.h"
18
#include "nspawn-mount.h"
19
#include "parse-util.h"
20
#include "path-util.h"
21
#include "rm-rf.h"
22
#include "set.h"
23
#include "sort-util.h"
24
#include "stat-util.h"
25
#include "string-util.h"
26
#include "strv.h"
27
#include "tmpfile-util.h"
28
#include "user-util.h"
29

30
CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t) {
498✔
31
        CustomMount *ret;
498✔
32

33
        assert(l);
498✔
34
        assert(n);
498✔
35
        assert(t >= 0);
498✔
36
        assert(t < _CUSTOM_MOUNT_TYPE_MAX);
498✔
37

38
        if (!GREEDY_REALLOC(*l, *n + 1))
498✔
39
                return NULL;
40

41
        ret = *l + *n;
498✔
42
        (*n)++;
498✔
43

44
        *ret = (CustomMount) {
498✔
45
                .type = t
46
        };
47

48
        return ret;
498✔
49
}
50

51
void custom_mount_free_all(CustomMount *l, size_t n) {
1,101✔
52
        FOREACH_ARRAY(m, l, n) {
1,343✔
53
                free(m->source);
242✔
54
                free(m->destination);
242✔
55
                free(m->options);
242✔
56

57
                if (m->work_dir) {
242✔
58
                        (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
3✔
59
                        free(m->work_dir);
3✔
60
                }
61

62
                if (m->rm_rf_tmpdir) {
242✔
63
                        (void) rm_rf(m->rm_rf_tmpdir, REMOVE_ROOT|REMOVE_PHYSICAL);
2✔
64
                        free(m->rm_rf_tmpdir);
2✔
65
                }
66

67
                strv_free(m->lower);
242✔
68
                free(m->type_argument);
242✔
69
        }
70

71
        free(l);
1,101✔
72
}
1,101✔
73

74
static int custom_mount_compare(const CustomMount *a, const CustomMount *b) {
103✔
75
        int r;
103✔
76

77
        r = path_compare(a->destination, b->destination);
103✔
78
        if (r != 0)
103✔
79
                return r;
80

81
        return CMP(a->type, b->type);
×
82
}
83

84
static int source_path_parse(const char *p, char **ret) {
465✔
85
        assert(p);
465✔
86
        assert(ret);
465✔
87

88
        if (isempty(p))
465✔
89
                return -EINVAL;
90

91
        if (*p == '+') {
465✔
92
                if (!path_is_absolute(p + 1))
7✔
93
                        return -EINVAL;
465✔
94

95
                char *s = strdup(p);
7✔
96
                if (!s)
7✔
97
                        return -ENOMEM;
98

99
                *ret = TAKE_PTR(s);
7✔
100
                return 0;
7✔
101
        }
102

103
        return path_make_absolute_cwd(p, ret);
458✔
104
}
105

106
static int source_path_parse_nullable(const char *p, char **ret) {
461✔
107
        assert(p);
461✔
108
        assert(ret);
461✔
109

110
        if (isempty(p)) {
461✔
111
                *ret = NULL;
7✔
112
                return 0;
7✔
113
        }
114

115
        return source_path_parse(p, ret);
454✔
116
}
117

118
static char *resolve_source_path(const char *dest, const char *source) {
359✔
119
        if (!source)
359✔
120
                return NULL;
121

122
        if (source[0] == '+')
359✔
123
                return path_join(dest, source + 1);
8✔
124

125
        return strdup(source);
351✔
126
}
127

128
static int allocate_temporary_source(CustomMount *m) {
8✔
129
        int r;
8✔
130

131
        assert(m);
8✔
132
        assert(!m->source);
8✔
133
        assert(!m->rm_rf_tmpdir);
8✔
134

135
        r = mkdtemp_malloc("/var/tmp/nspawn-temp-XXXXXX", &m->rm_rf_tmpdir);
8✔
136
        if (r < 0)
8✔
137
                return log_error_errno(r, "Failed to acquire temporary directory: %m");
×
138

139
        m->source = path_join(m->rm_rf_tmpdir, "src");
8✔
140
        if (!m->source)
8✔
141
                return log_oom();
×
142

143
        if (mkdir(m->source, 0755) < 0)
8✔
144
                return log_error_errno(errno, "Failed to create %s: %m", m->source);
×
145

146
        return 0;
147
}
148

149
int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n) {
360✔
150
        int r;
360✔
151

152
        /* Prepare all custom mounts. This will make sure we know all temporary directories. This is called in the
153
         * parent process, so that we know the temporary directories to remove on exit before we fork off the
154
         * children. */
155

156
        assert(l || n == 0);
360✔
157

158
        /* Order the custom mounts, and make sure we have a working directory */
159
        typesafe_qsort(l, n, custom_mount_compare);
360✔
160

161
        FOREACH_ARRAY(m, l, n) {
751✔
162
                /* /proc we mount in the inner child, i.e. when we acquired CLONE_NEWPID. All other mounts we mount
163
                 * already in the outer child, so that the mounts are already established before CLONE_NEWPID and in
164
                 * particular CLONE_NEWUSER. This also means any custom mounts below /proc also need to be mounted in
165
                 * the inner child, not the outer one. Determine this here. */
166
                m->in_userns = path_startswith(m->destination, "/proc");
391✔
167

168
                if (m->type == CUSTOM_MOUNT_BIND) {
391✔
169
                        if (m->source) {
347✔
170
                                char *s;
346✔
171

172
                                s = resolve_source_path(dest, m->source);
346✔
173
                                if (!s)
346✔
174
                                        return log_oom();
×
175

176
                                free_and_replace(m->source, s);
346✔
177
                        } else {
178
                                /* No source specified? In that case, use a throw-away temporary directory in /var/tmp */
179

180
                                r = allocate_temporary_source(m);
1✔
181
                                if (r < 0)
1✔
182
                                        return r;
183
                        }
184
                }
185

186
                if (m->type == CUSTOM_MOUNT_OVERLAY) {
391✔
187
                        STRV_FOREACH(j, m->lower) {
20✔
188
                                char *s;
11✔
189

190
                                s = resolve_source_path(dest, *j);
11✔
191
                                if (!s)
11✔
192
                                        return log_oom();
×
193

194
                                free_and_replace(*j, s);
11✔
195
                        }
196

197
                        if (m->source) {
9✔
198
                                char *s;
2✔
199

200
                                s = resolve_source_path(dest, m->source);
2✔
201
                                if (!s)
2✔
202
                                        return log_oom();
×
203

204
                                free_and_replace(m->source, s);
2✔
205
                        } else {
206
                                r = allocate_temporary_source(m);
7✔
207
                                if (r < 0)
7✔
208
                                        return r;
209
                        }
210

211
                        if (m->work_dir) {
9✔
212
                                char *s;
×
213

214
                                s = resolve_source_path(dest, m->work_dir);
×
215
                                if (!s)
×
216
                                        return log_oom();
×
217

218
                                free_and_replace(m->work_dir, s);
×
219
                        } else {
220
                                r = tempfn_random(m->source, NULL, &m->work_dir);
9✔
221
                                if (r < 0)
9✔
222
                                        return log_error_errno(r, "Failed to acquire working directory: %m");
×
223
                        }
224

225
                        (void) mkdir_label(m->work_dir, 0700);
9✔
226
                }
227
        }
228

229
        return 0;
230
}
231

232
int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
452✔
233
        _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL, *p = NULL;
452✔
234
        CustomMount *m;
452✔
235
        int r;
452✔
236

237
        assert(l);
452✔
238
        assert(n);
452✔
239

240
        r = extract_many_words(&s, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination);
452✔
241
        if (r < 0)
452✔
242
                return r;
243
        if (r == 0)
452✔
244
                return -EINVAL;
245
        if (r == 1) {
452✔
246
                destination = strdup(source[0] == '+' ? source+1 : source);
424✔
247
                if (!destination)
424✔
248
                        return -ENOMEM;
249
        }
250
        if (r == 2 && !isempty(s)) {
452✔
251
                opts = strdup(s);
9✔
252
                if (!opts)
9✔
253
                        return -ENOMEM;
254
        }
255

256
        r = source_path_parse_nullable(source, &p);
452✔
257
        if (r < 0)
452✔
258
                return r;
259

260
        if (!path_is_absolute(destination))
900✔
261
                return -EINVAL;
262

263
        m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
448✔
264
        if (!m)
448✔
265
                return -ENOMEM;
266

267
        m->source = TAKE_PTR(p);
448✔
268
        m->destination = TAKE_PTR(destination);
448✔
269
        m->read_only = read_only;
448✔
270
        m->options = TAKE_PTR(opts);
448✔
271

272
        return 0;
448✔
273
}
274

275
int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s) {
17✔
276
        _cleanup_free_ char *path = NULL, *opts = NULL;
17✔
277
        const char *p = ASSERT_PTR(s);
17✔
278
        CustomMount *m;
17✔
279
        int r;
17✔
280

281
        assert(l);
17✔
282
        assert(n);
17✔
283

284
        r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
17✔
285
        if (r < 0)
17✔
286
                return r;
287
        if (r == 0)
17✔
288
                return -EINVAL;
289

290
        if (isempty(p))
17✔
291
                opts = strdup("mode=0755");
16✔
292
        else
293
                opts = strdup(p);
1✔
294
        if (!opts)
17✔
295
                return -ENOMEM;
296

297
        if (!path_is_absolute(path))
32✔
298
                return -EINVAL;
299

300
        m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
15✔
301
        if (!m)
15✔
302
                return -ENOMEM;
303

304
        m->destination = TAKE_PTR(path);
15✔
305
        m->options = TAKE_PTR(opts);
15✔
306

307
        return 0;
15✔
308
}
309

310
int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
13✔
311
        _cleanup_free_ char *upper = NULL, *destination = NULL;
13✔
312
        _cleanup_strv_free_ char **lower = NULL;
13✔
313
        CustomMount *m;
13✔
314
        int r, k;
13✔
315

316
        k = strv_split_full(&lower, s, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
13✔
317
        if (k < 0)
13✔
318
                return k;
319
        if (k < 2)
13✔
320
                return -EADDRNOTAVAIL;
321
        if (k == 2) {
9✔
322
                _cleanup_free_ char *p = NULL;
×
323

324
                /* If two parameters are specified, the first one is the lower, the second one the upper directory. And
325
                 * we'll also define the destination mount point the same as the upper. */
326

327
                r = source_path_parse(lower[0], &p);
×
328
                if (r < 0)
×
329
                        return r;
330

331
                free_and_replace(lower[0], p);
×
332

333
                r = source_path_parse(lower[1], &p);
×
334
                if (r < 0)
×
335
                        return r;
336

337
                free_and_replace(lower[1], p);
×
338

339
                upper = TAKE_PTR(lower[1]);
×
340

341
                destination = strdup(upper[0] == '+' ? upper+1 : upper); /* take the destination without "+" prefix */
×
342
                if (!destination)
×
343
                        return -ENOMEM;
344
        } else {
345
                _cleanup_free_ char *p = NULL;
9✔
346

347
                /* If more than two parameters are specified, the last one is the destination, the second to last one
348
                 * the "upper", and all before that the "lower" directories. */
349

350
                destination = lower[k - 1];
9✔
351
                upper = TAKE_PTR(lower[k - 2]);
9✔
352

353
                STRV_FOREACH(i, lower) {
20✔
354
                        r = source_path_parse(*i, &p);
11✔
355
                        if (r < 0)
11✔
356
                                return r;
357

358
                        free_and_replace(*i, p);
11✔
359
                }
360

361
                /* If the upper directory is unspecified, then let's create it automatically as a throw-away directory
362
                 * in /var/tmp */
363
                r = source_path_parse_nullable(upper, &p);
9✔
364
                if (r < 0)
9✔
365
                        return r;
366

367
                free_and_replace(upper, p);
9✔
368

369
                if (!path_is_absolute(destination))
9✔
370
                        return -EINVAL;
371
        }
372

373
        m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY);
9✔
374
        if (!m)
9✔
375
                return -ENOMEM;
376

377
        m->destination = TAKE_PTR(destination);
9✔
378
        m->source = TAKE_PTR(upper);
9✔
379
        m->lower = TAKE_PTR(lower);
9✔
380
        m->read_only = read_only;
9✔
381

382
        return 0;
9✔
383
}
384

385
int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s) {
18✔
386
        _cleanup_free_ char *path = NULL;
18✔
387
        CustomMount *m;
18✔
388

389
        assert(l);
18✔
390
        assert(n);
18✔
391
        assert(s);
18✔
392

393
        if (!path_is_absolute(s))
18✔
394
                return -EINVAL;
395

396
        path = strdup(s);
16✔
397
        if (!path)
16✔
398
                return -ENOMEM;
399

400
        m = custom_mount_add(l, n, CUSTOM_MOUNT_INACCESSIBLE);
16✔
401
        if (!m)
16✔
402
                return -ENOMEM;
403

404
        m->destination = TAKE_PTR(path);
16✔
405
        return 0;
16✔
406
}
407

408
int tmpfs_patch_options(
992✔
409
                const char *options,
410
                uid_t uid_shift,
411
                const char *selinux_apifs_context,
412
                char **ret) {
413

414
        _cleanup_free_ char *buf = NULL;
992✔
415

416
        assert(ret);
992✔
417

418
        if (options) {
992✔
419
                buf = strdup(options);
992✔
420
                if (!buf)
992✔
421
                        return -ENOMEM;
422
        }
423

424
        if (uid_shift != UID_INVALID)
992✔
425
                if (strextendf_with_separator(&buf, ",", "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift) < 0)
980✔
426
                        return -ENOMEM;
427

428
#if HAVE_SELINUX
429
        if (selinux_apifs_context)
430
                if (strextendf_with_separator(&buf, ",", "context=\"%s\"", selinux_apifs_context) < 0)
431
                        return -ENOMEM;
432
#endif
433

434
        *ret = TAKE_PTR(buf);
992✔
435
        return !!*ret;
992✔
436
}
437

438
int mount_sysfs(const char *dest, MountSettingsMask mount_settings) {
106✔
439
        _cleanup_free_ char *top = NULL, *full = NULL;;
106✔
440
        unsigned long extra_flags = 0;
106✔
441
        int r;
106✔
442

443
        top = path_join(dest, "/sys");
106✔
444
        if (!top)
106✔
445
                return log_oom();
×
446

447
        r = path_is_mount_point(top);
106✔
448
        if (r < 0)
106✔
449
                return log_error_errno(r, "Failed to determine if '%s' is a mountpoint: %m", top);
×
450
        if (r == 0) {
106✔
451
                /* If this is not a mount point yet, then mount a tmpfs there */
452
                r = mount_nofollow_verbose(LOG_ERR, "tmpfs", top, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, "mode=0555" TMPFS_LIMITS_SYS);
1✔
453
                if (r < 0)
1✔
454
                        return r;
455
        } else {
456
                r = path_is_fs_type(top, SYSFS_MAGIC);
105✔
457
                if (r < 0)
105✔
458
                        return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
×
459

460
                /* /sys/ might already be mounted as sysfs by the outer child in the !netns case. In this case, it's
461
                 * all good. Don't touch it because we don't have the right to do so, see
462
                 * https://github.com/systemd/systemd/issues/1555.
463
                 */
464
                if (r > 0)
105✔
465
                        return 0;
466
        }
467

468
        full = path_join(top, "/full");
51✔
469
        if (!full)
51✔
470
                return log_oom();
×
471

472
        if (mkdir(full, 0755) < 0 && errno != EEXIST)
51✔
473
                return log_error_errno(errno, "Failed to create directory '%s': %m", full);
×
474

475
        if (FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO))
51✔
476
                extra_flags |= MS_RDONLY;
47✔
477

478
        r = mount_nofollow_verbose(LOG_ERR, "sysfs", full, "sysfs",
51✔
479
                                   MS_NOSUID|MS_NOEXEC|MS_NODEV|extra_flags, NULL);
480
        if (r < 0)
51✔
481
                return r;
482

483
        FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
357✔
484
                _cleanup_free_ char *from = NULL, *to = NULL;
306✔
485

486
                from = path_join(full, x);
306✔
487
                if (!from)
306✔
488
                        return log_oom();
×
489

490
                to = path_join(top, x);
306✔
491
                if (!to)
306✔
492
                        return log_oom();
×
493

494
                (void) mkdir(to, 0755);
306✔
495

496
                r = mount_nofollow_verbose(LOG_ERR, from, to, NULL, MS_BIND, NULL);
306✔
497
                if (r < 0)
306✔
498
                        return r;
499

500
                r = mount_nofollow_verbose(LOG_ERR, NULL, to, NULL,
306✔
501
                                           MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
502
                if (r < 0)
306✔
503
                        return r;
504
        }
505

506
        r = umount_verbose(LOG_ERR, full, UMOUNT_NOFOLLOW);
51✔
507
        if (r < 0)
51✔
508
                return r;
509

510
        if (rmdir(full) < 0)
51✔
511
                return log_error_errno(errno, "Failed to remove %s: %m", full);
×
512

513
        /* Create mountpoint for cgroups. Otherwise we are not allowed since we remount /sys/ read-only. */
514
        _cleanup_free_ char *x = path_join(top, "/fs/cgroup");
102✔
515
        if (!x)
51✔
516
                return log_oom();
×
517

518
        (void) mkdir_p(x, 0755);
51✔
519

520
        return mount_nofollow_verbose(LOG_ERR, NULL, top, NULL,
51✔
521
                                      MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
522
}
523

524
#define PROC_DEFAULT_MOUNT_FLAGS (MS_NOSUID|MS_NOEXEC|MS_NODEV)
525
#define SYS_DEFAULT_MOUNT_FLAGS  (MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV)
526

527
int mount_all(const char *dest,
324✔
528
              MountSettingsMask mount_settings,
529
              uid_t uid_shift,
530
              const char *selinux_apifs_context) {
531

532
#define PROC_INACCESSIBLE_REG(path)                                     \
533
        { "/run/systemd/inaccessible/reg", (path), NULL, NULL, MS_BIND, \
534
          MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
535
        { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
536
          MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
537

538
#define PROC_READ_ONLY(path)                                            \
539
        { (path), (path), NULL, NULL, MS_BIND,                          \
540
          MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
541
        { NULL,   (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
542
          MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
543

544
        typedef struct MountPoint {
324✔
545
                const char *what;
546
                const char *where;
547
                const char *type;
548
                const char *options;
549
                unsigned long flags;
550
                MountSettingsMask mount_settings;
551
        } MountPoint;
552

553
        static const MountPoint mount_table[] = {
324✔
554
                /* First we list inner child mounts (i.e. mounts applied *after* entering user namespacing when we are privileged) */
555
                { "proc",            "/proc",           "proc",  NULL,        PROC_DEFAULT_MOUNT_FLAGS,
556
                  MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_MKDIR|MOUNT_FOLLOW_SYMLINKS }, /* we follow symlinks here since not following them requires /proc/ already being mounted, which we don't have here. */
557

558
                { "/proc/sys",       "/proc/sys",       NULL,    NULL,        MS_BIND,
559
                  MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO },                          /* Bind mount first ... */
560

561
                { "/proc/sys/net",   "/proc/sys/net",   NULL,    NULL,        MS_BIND,
562
                  MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */
563

564
                { NULL,              "/proc/sys",       NULL,    NULL,        MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
565
                  MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO },                          /* ... then, make it r/o */
566

567
                /* Make these files inaccessible to container payloads: they potentially leak information about kernel
568
                 * internals or the host's execution environment to the container */
569
                PROC_INACCESSIBLE_REG("/proc/kallsyms"),
570
                PROC_INACCESSIBLE_REG("/proc/kcore"),
571
                PROC_INACCESSIBLE_REG("/proc/keys"),
572
                PROC_INACCESSIBLE_REG("/proc/sysrq-trigger"),
573
                PROC_INACCESSIBLE_REG("/proc/timer_list"),
574

575
                /* Make these directories read-only to container payloads: they show hardware information, and in some
576
                 * cases contain tunables the container really shouldn't have access to. */
577
                PROC_READ_ONLY("/proc/acpi"),
578
                PROC_READ_ONLY("/proc/apm"),
579
                PROC_READ_ONLY("/proc/asound"),
580
                PROC_READ_ONLY("/proc/bus"),
581
                PROC_READ_ONLY("/proc/fs"),
582
                PROC_READ_ONLY("/proc/irq"),
583
                PROC_READ_ONLY("/proc/scsi"),
584

585
                { "mqueue",                 "/dev/mqueue",                  "mqueue", NULL,                            MS_NOSUID|MS_NOEXEC|MS_NODEV,
586
                  MOUNT_IN_USERNS|MOUNT_MKDIR },
587

588
                /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing when we are privileged) */
589
                { "tmpfs",                  "/tmp",                         "tmpfs", "mode=01777" NESTED_TMPFS_LIMITS, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
590
                  MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP|MOUNT_MKDIR|MOUNT_USRQUOTA_GRACEFUL },
591
                { "tmpfs",                  "/sys",                         "tmpfs", "mode=0555" TMPFS_LIMITS_SYS,     MS_NOSUID|MS_NOEXEC|MS_NODEV,
592
                  MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS|MOUNT_MKDIR|MOUNT_UNMANAGED },
593
                { "sysfs",                  "/sys",                         "sysfs", NULL,                             SYS_DEFAULT_MOUNT_FLAGS,
594
                  MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO|MOUNT_MKDIR|MOUNT_UNMANAGED },    /* skipped if above was mounted */
595
                { "sysfs",                  "/sys",                         "sysfs", NULL,                             MS_NOSUID|MS_NOEXEC|MS_NODEV,
596
                  MOUNT_FATAL|MOUNT_MKDIR|MOUNT_UNMANAGED },                          /* skipped if above was mounted */
597
                { "tmpfs",                  "/dev",                         "tmpfs", "mode=0755" TMPFS_LIMITS_PRIVATE_DEV, MS_NOSUID|MS_STRICTATIME,
598
                  MOUNT_FATAL|MOUNT_MKDIR },
599
                { "tmpfs",                  "/dev/shm",                     "tmpfs", "mode=01777" NESTED_TMPFS_LIMITS, MS_NOSUID|MS_NODEV|MS_STRICTATIME,
600
                  MOUNT_FATAL|MOUNT_MKDIR|MOUNT_USRQUOTA_GRACEFUL },
601
                { "tmpfs",                  "/run",                         "tmpfs", "mode=0755" TMPFS_LIMITS_RUN,     MS_NOSUID|MS_NODEV|MS_STRICTATIME,
602
                  MOUNT_FATAL|MOUNT_MKDIR },
603
                { "/run/host",              "/run/host",                    NULL,    NULL,                             MS_BIND,
604
                  MOUNT_FATAL|MOUNT_MKDIR|MOUNT_PREFIX_ROOT }, /* Prepare this so that we can make it read-only when we are done */
605
                { "/etc/os-release",        "/run/host/os-release",         NULL,    NULL,                             MS_BIND,
606
                  MOUNT_TOUCH }, /* As per kernel interface requirements, bind mount first (creating mount points) and make read-only later */
607
                { "/usr/lib/os-release",    "/run/host/os-release",         NULL,    NULL,                             MS_BIND,
608
                  MOUNT_FATAL }, /* If /etc/os-release doesn't exist use the version in /usr/lib as fallback */
609
                { NULL,                     "/run/host/os-release",         NULL,    NULL,                             MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
610
                  MOUNT_FATAL },
611
                { NULL,                     "/run/host/os-release",         NULL,    NULL,                             MS_PRIVATE,
612
                  MOUNT_FATAL },  /* Turn off propagation (we only want that for the mount propagation tunnel dir) */
613
                { NULL,                     "/run/host",                    NULL,    NULL,                             MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
614
                  MOUNT_FATAL|MOUNT_IN_USERNS },
615
#if HAVE_SELINUX
616
                { "/sys/fs/selinux",        "/sys/fs/selinux",              NULL,    NULL,                             MS_BIND,
617
                  MOUNT_MKDIR|MOUNT_PRIVILEGED },  /* Bind mount first (mkdir/chown the mount point in case /sys/ is mounted as minimal skeleton tmpfs) */
618
                { NULL,                     "/sys/fs/selinux",              NULL,    NULL,                             MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
619
                  MOUNT_UNMANAGED|MOUNT_PRIVILEGED },  /* Then, make it r/o (don't mkdir/chown the mount point here, the previous entry already did that) */
620
                { NULL,                     "/sys/fs/selinux",              NULL,    NULL,                             MS_PRIVATE,
621
                  MOUNT_UNMANAGED|MOUNT_PRIVILEGED },  /* Turn off propagation (we only want that for the mount propagation tunnel dir) */
622
#endif
623
        };
624

625
        bool use_userns = FLAGS_SET(mount_settings, MOUNT_USE_USERNS);
324✔
626
        bool netns = FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_NETNS);
324✔
627
        bool ro = FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO);
324✔
628
        bool in_userns = FLAGS_SET(mount_settings, MOUNT_IN_USERNS);
324✔
629
        bool tmpfs_tmp = FLAGS_SET(mount_settings, MOUNT_APPLY_TMPFS_TMP);
324✔
630
        bool unmanaged = FLAGS_SET(mount_settings, MOUNT_UNMANAGED);
324✔
631
        bool privileged = FLAGS_SET(mount_settings, MOUNT_PRIVILEGED);
324✔
632
        int r;
324✔
633

634
        FOREACH_ELEMENT(m, mount_table) {
13,932✔
635
                _cleanup_free_ char *where = NULL, *options = NULL, *prefixed = NULL;
13,608✔
636
                bool fatal = FLAGS_SET(m->mount_settings, MOUNT_FATAL);
13,608✔
637
                const char *o;
13,608✔
638

639
                /* If we are in managed user namespace mode but the entry is marked for mount outside of
640
                 * managed user namespace mode, and to be mounted outside the user namespace, then skip it */
641
                if (!unmanaged && FLAGS_SET(m->mount_settings, MOUNT_UNMANAGED) && !FLAGS_SET(m->mount_settings, MOUNT_IN_USERNS))
13,608✔
642
                        continue;
27✔
643

644
                if (in_userns != FLAGS_SET(m->mount_settings, MOUNT_IN_USERNS))
13,581✔
645
                        continue;
7,809✔
646

647
                if (!netns && FLAGS_SET(m->mount_settings, MOUNT_APPLY_APIVFS_NETNS))
5,772✔
648
                        continue;
165✔
649

650
                if (!ro && FLAGS_SET(m->mount_settings, MOUNT_APPLY_APIVFS_RO))
5,607✔
651
                        continue;
228✔
652

653
                if (!tmpfs_tmp && FLAGS_SET(m->mount_settings, MOUNT_APPLY_TMPFS_TMP))
5,379✔
654
                        continue;
×
655

656
                if (!privileged && FLAGS_SET(m->mount_settings, MOUNT_PRIVILEGED))
5,379✔
657
                        continue;
×
658

659
                r = chase(m->where, dest, CHASE_NONEXISTENT|CHASE_PREFIX_ROOT, &where, NULL);
5,379✔
660
                if (r < 0)
5,379✔
661
                        return log_error_errno(r, "Failed to resolve %s%s: %m", strempty(dest), m->where);
×
662

663
                /* Skip this entry if it is not a remount. */
664
                if (m->what) {
5,379✔
665
                        r = path_is_mount_point(where);
3,563✔
666
                        if (r < 0 && r != -ENOENT)
3,563✔
667
                                return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
×
668
                        if (r > 0)
3,563✔
669
                                continue;
513✔
670
                }
671

672
                if ((m->mount_settings & (MOUNT_MKDIR|MOUNT_TOUCH)) != 0) {
4,866✔
673
                        uid_t u = (use_userns && !in_userns) ? uid_shift : UID_INVALID;
1,729✔
674

675
                        if (FLAGS_SET(m->mount_settings, MOUNT_TOUCH))
1,729✔
676
                                r = mkdir_parents_safe(dest, where, 0755, u, u, 0);
218✔
677
                        else
678
                                r = mkdir_p_safe(dest, where, 0755, u, u, 0);
1,511✔
679
                        if (r < 0 && r != -EEXIST) {
1,729✔
680
                                if (fatal && r != -EROFS)
×
681
                                        return log_error_errno(r, "Failed to create directory %s: %m", where);
×
682

683
                                log_debug_errno(r, "Failed to create directory %s: %m", where);
×
684

685
                                /* If we failed mkdir() or chown() due to the root directory being read only,
686
                                 * attempt to mount this fs anyway and let mount_verbose log any errors */
687
                                if (r != -EROFS)
×
688
                                        continue;
×
689
                        }
690
                }
691

692
                if (FLAGS_SET(m->mount_settings, MOUNT_TOUCH)) {
4,866✔
693
                        r = touch(where);
218✔
694
                        if (r < 0 && r != -EEXIST) {
218✔
695
                                if (fatal && r != -EROFS)
×
696
                                        return log_error_errno(r, "Failed to create file %s: %m", where);
×
697

698
                                log_debug_errno(r, "Failed to create file %s: %m", where);
×
699
                                if (r != -EROFS)
×
700
                                        continue;
×
701
                        }
702
                }
703

704
                o = m->options;
4,866✔
705
                if (streq_ptr(m->type, "tmpfs")) {
4,866✔
706
                        r = tmpfs_patch_options(o, in_userns ? 0 : uid_shift, selinux_apifs_context, &options);
1,944✔
707
                        if (r < 0)
972✔
708
                                return log_oom();
×
709
                        if (r > 0)
972✔
710
                                o = options;
972✔
711
                }
712

713
                if (FLAGS_SET(m->mount_settings, MOUNT_USRQUOTA_GRACEFUL)) {
4,866✔
714
                        r = mount_option_supported(m->type, /* key= */ "usrquota", /* value= */ NULL);
436✔
715
                        if (r < 0)
436✔
716
                                log_warning_errno(r, "Failed to determine if '%s' supports 'usrquota', assuming it doesn't: %m", m->type);
×
717
                        else if (r == 0)
436✔
718
                                log_debug("Kernel doesn't support 'usrquota' on '%s', not including in mount options for '%s'.", m->type, m->where);
16✔
719
                        else {
720
                                _cleanup_free_ char *joined = NULL;
×
721

722
                                if (!strextend_with_separator(&joined, ",", o ?: POINTER_MAX, "usrquota"))
420✔
723
                                        return log_oom();
×
724

725
                                free_and_replace(options, joined);
420✔
726
                                o = options;
420✔
727
                        }
728
                }
729

730
                if (FLAGS_SET(m->mount_settings, MOUNT_PREFIX_ROOT)) {
4,866✔
731
                        /* Optionally prefix the mount source with the root dir. This is useful in bind
732
                         * mounts to be created within the container image before we transition into it. Note
733
                         * that MOUNT_IN_USERNS is run after we transitioned hence prefixing is not necessary
734
                         * for those. */
735
                        r = chase(m->what, dest, CHASE_PREFIX_ROOT, &prefixed, NULL);
218✔
736
                        if (r < 0)
218✔
737
                                return log_error_errno(r, "Failed to resolve %s%s: %m", strempty(dest), m->what);
×
738
                }
739

740
                r = mount_verbose_full(
7,542✔
741
                                fatal ? LOG_ERR : LOG_DEBUG,
742
                                prefixed ?: m->what,
4,866✔
743
                                where,
744
                                m->type,
4,866✔
745
                                m->flags,
4,866✔
746
                                o,
747
                                FLAGS_SET(m->mount_settings, MOUNT_FOLLOW_SYMLINKS));
4,866✔
748
                if (r < 0 && fatal)
4,866✔
749
                        return r;
750
        }
751

752
        return 0;
753
}
754

755
static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts, RemountIdmapping *idmapping) {
4✔
756
        unsigned long flags = *mount_flags;
4✔
757
        char *opts = NULL;
4✔
758
        RemountIdmapping new_idmapping = *idmapping;
4✔
759
        int r;
4✔
760

761
        assert(options);
4✔
762

763
        for (;;) {
16✔
764
                _cleanup_free_ char *word = NULL;
6✔
765

766
                r = extract_first_word(&options, &word, ",", 0);
10✔
767
                if (r < 0)
10✔
768
                        return log_error_errno(r, "Failed to extract mount option: %m");
×
769
                if (r == 0)
10✔
770
                        break;
771

772
                if (streq(word, "rbind"))
6✔
773
                        flags |= MS_REC;
×
774
                else if (streq(word, "norbind"))
6✔
775
                        flags &= ~MS_REC;
2✔
776
                else if (streq(word, "idmap"))
4✔
777
                        new_idmapping = REMOUNT_IDMAPPING_HOST_ROOT;
778
                else if (streq(word, "noidmap"))
4✔
779
                        new_idmapping = REMOUNT_IDMAPPING_NONE;
780
                else if (streq(word, "rootidmap"))
2✔
781
                        new_idmapping = REMOUNT_IDMAPPING_HOST_OWNER;
782
                else if (streq(word, "owneridmap"))
2✔
783
                        new_idmapping = REMOUNT_IDMAPPING_HOST_OWNER_TO_TARGET_OWNER;
784
                else
785
                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
786
                                               "Invalid bind mount option: %s", word);
787
        }
788

789
        *mount_flags = flags;
4✔
790
        *idmapping = new_idmapping;
4✔
791
        /* in the future mount_opts will hold string options for mount(2) */
792
        *mount_opts = opts;
4✔
793

794
        return 0;
4✔
795
}
796

797
static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t uid_range) {
230✔
798
        _cleanup_free_ char *mount_opts = NULL, *where = NULL;
230✔
799
        unsigned long mount_flags = MS_BIND | MS_REC;
230✔
800
        struct stat source_st, dest_st;
230✔
801
        uid_t dest_uid = UID_INVALID;
230✔
802
        int r;
230✔
803
        RemountIdmapping idmapping = REMOUNT_IDMAPPING_NONE;
230✔
804

805
        assert(dest);
230✔
806
        assert(m);
230✔
807

808
        if (m->options) {
230✔
809
                r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts, &idmapping);
4✔
810
                if (r < 0)
4✔
811
                        return r;
812
        }
813

814
        /* If this is a bind mount from a temporary sources change ownership of the source to the container's
815
         * root UID. Otherwise it would always show up as "nobody" if user namespacing is used. */
816
        if (m->rm_rf_tmpdir && chown(m->source, uid_shift, uid_shift) < 0)
230✔
817
                return log_error_errno(errno, "Failed to chown %s: %m", m->source);
×
818

819
        if (stat(m->source, &source_st) < 0)
230✔
820
                return log_error_errno(errno, "Failed to stat %s: %m", m->source);
×
821

822
        r = chase(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
230✔
823
        if (r < 0)
230✔
824
                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
825
        if (r > 0) { /* Path exists already? */
230✔
826

827
                if (stat(where, &dest_st) < 0)
136✔
828
                        return log_error_errno(errno, "Failed to stat %s: %m", where);
×
829

830
                dest_uid = dest_st.st_uid;
136✔
831

832
                if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode))
136✔
833
                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
834
                                               "Cannot bind mount directory %s on file %s.",
835
                                               m->source, where);
836

837
                if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode))
136✔
838
                        return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
839
                                               "Cannot bind mount file %s on directory %s.",
840
                                               m->source, where);
841

842
        } else { /* Path doesn't exist yet? */
843
                r = mkdir_parents_safe_label(dest, where, 0755, uid_shift, uid_shift, MKDIR_IGNORE_EXISTING);
94✔
844
                if (r < 0)
94✔
845
                        return log_error_errno(r, "Failed to make parents of %s: %m", where);
×
846

847
                /* Create the mount point. Any non-directory file can be
848
                * mounted on any non-directory file (regular, fifo, socket,
849
                * char, block).
850
                */
851
                if (S_ISDIR(source_st.st_mode))
94✔
852
                        r = mkdir_label(where, 0755);
92✔
853
                else
854
                        r = touch(where);
2✔
855
                if (r < 0)
94✔
856
                        return log_error_errno(r, "Failed to create mount point %s: %m", where);
×
857

858
                if (chown(where, uid_shift, uid_shift) < 0)
94✔
859
                        return log_error_errno(errno, "Failed to chown %s: %m", where);
×
860

861
                dest_uid = uid_shift;
862
        }
863

864
        r = mount_nofollow_verbose(LOG_ERR, m->source, where, NULL, mount_flags, mount_opts);
230✔
865
        if (r < 0)
230✔
866
                return r;
867

868
        if (m->read_only) {
230✔
869
                r = bind_remount_recursive(where, MS_RDONLY, MS_RDONLY, NULL);
2✔
870
                if (r < 0)
2✔
871
                        return log_error_errno(r, "Read-only bind mount failed: %m");
×
872
        }
873

874
        if (idmapping != REMOUNT_IDMAPPING_NONE) {
230✔
875
                r = remount_idmap(STRV_MAKE(where), uid_shift, uid_range, source_st.st_uid, dest_uid, idmapping);
2✔
876
                if (r < 0)
2✔
877
                        return log_error_errno(r, "Failed to map ids for bind mount %s: %m", where);
×
878
        }
879

880
        return 0;
881
}
882

883
static int mount_tmpfs(const char *dest, CustomMount *m, uid_t uid_shift, const char *selinux_apifs_context) {
4✔
884
        const char *options;
4✔
885
        _cleanup_free_ char *buf = NULL, *where = NULL;
4✔
886
        int r;
4✔
887

888
        assert(dest);
4✔
889
        assert(m);
4✔
890

891
        r = chase(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
4✔
892
        if (r < 0)
4✔
893
                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
894
        if (r == 0) { /* Doesn't exist yet? */
4✔
895
                r = mkdir_p_label(where, 0755);
×
896
                if (r < 0)
×
897
                        return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
×
898
        }
899

900
        r = tmpfs_patch_options(m->options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
8✔
901
        if (r < 0)
4✔
902
                return log_oom();
×
903
        options = r > 0 ? buf : m->options;
4✔
904

905
        return mount_nofollow_verbose(LOG_ERR, "tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options);
4✔
906
}
907

908
static char *joined_and_escaped_lower_dirs(char **lower) {
2✔
909
        _cleanup_strv_free_ char **sv = NULL;
×
910

911
        sv = strv_copy(lower);
2✔
912
        if (!sv)
2✔
913
                return NULL;
914

915
        strv_reverse(sv);
2✔
916

917
        if (!strv_shell_escape(sv, ",:"))
2✔
918
                return NULL;
919

920
        return strv_join(sv, ":");
2✔
921
}
922

923
static int mount_overlay(const char *dest, CustomMount *m) {
2✔
924
        _cleanup_free_ char *lower = NULL, *where = NULL, *escaped_source = NULL;
2✔
925
        const char *options;
2✔
926
        int r;
2✔
927

928
        assert(dest);
2✔
929
        assert(m);
2✔
930

931
        r = chase(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
2✔
932
        if (r < 0)
2✔
933
                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
934
        if (r == 0) { /* Doesn't exist yet? */
2✔
935
                r = mkdir_label(where, 0755);
×
936
                if (r < 0)
×
937
                        return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
×
938
        }
939

940
        (void) mkdir_p_label(m->source, 0755);
2✔
941

942
        lower = joined_and_escaped_lower_dirs(m->lower);
2✔
943
        if (!lower)
2✔
944
                return log_oom();
×
945

946
        escaped_source = shell_escape(m->source, ",:");
2✔
947
        if (!escaped_source)
2✔
948
                return log_oom();
×
949

950
        if (m->read_only)
2✔
951
                options = strjoina("lowerdir=", escaped_source, ":", lower);
×
952
        else {
953
                _cleanup_free_ char *escaped_work_dir = NULL;
2✔
954

955
                escaped_work_dir = shell_escape(m->work_dir, ",:");
2✔
956
                if (!escaped_work_dir)
2✔
957
                        return log_oom();
×
958

959
                options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
26✔
960
        }
961

962
        return mount_nofollow_verbose(LOG_ERR, "overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options);
2✔
963
}
964

965
static int mount_inaccessible(const char *dest, CustomMount *m) {
4✔
966
        _cleanup_free_ char *where = NULL, *source = NULL;
4✔
967
        struct stat st;
4✔
968
        int r;
4✔
969

970
        assert(dest);
4✔
971
        assert(m);
4✔
972

973
        r = chase_and_stat(m->destination, dest, CHASE_PREFIX_ROOT, &where, &st);
4✔
974
        if (r < 0) {
4✔
975
                log_full_errno(m->graceful ? LOG_DEBUG : LOG_ERR, r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
976
                return m->graceful ? 0 : r;
×
977
        }
978

979
        r = mode_to_inaccessible_node(NULL, st.st_mode, &source);
4✔
980
        if (r < 0)
4✔
981
                return m->graceful ? 0 : r;
×
982

983
        r = mount_nofollow_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, source, where, NULL, MS_BIND, NULL);
4✔
984
        if (r < 0)
4✔
985
                return m->graceful ? 0 : r;
×
986

987
        r = mount_nofollow_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, NULL, where, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL);
4✔
988
        if (r < 0) {
4✔
989
                (void) umount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, where, UMOUNT_NOFOLLOW);
×
990
                return m->graceful ? 0 : r;
×
991
        }
992

993
        return 0;
994
}
995

996
static int mount_arbitrary(const char *dest, CustomMount *m) {
×
997
        _cleanup_free_ char *where = NULL;
×
998
        int r;
×
999

1000
        assert(dest);
×
1001
        assert(m);
×
1002

1003
        r = chase(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where, NULL);
×
1004
        if (r < 0)
×
1005
                return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
×
1006
        if (r == 0) { /* Doesn't exist yet? */
×
1007
                r = mkdir_p_label(where, 0755);
×
1008
                if (r < 0)
×
1009
                        return log_error_errno(r, "Creating mount point for mount %s failed: %m", where);
×
1010
        }
1011

1012
        return mount_nofollow_verbose(LOG_ERR, m->source, where, m->type_argument, 0, m->options);
×
1013
}
1014

1015
int mount_custom(
542✔
1016
                const char *dest,
1017
                CustomMount *mounts, size_t n,
1018
                uid_t uid_shift,
1019
                uid_t uid_range,
1020
                const char *selinux_apifs_context,
1021
                MountSettingsMask mount_settings) {
1022
        int r;
542✔
1023

1024
        assert(dest);
542✔
1025

1026
        FOREACH_ARRAY(m, mounts, n) {
1,142✔
1027
                if (FLAGS_SET(mount_settings, MOUNT_IN_USERNS) != m->in_userns)
600✔
1028
                        continue;
120✔
1029

1030
                if (FLAGS_SET(mount_settings, MOUNT_ROOT_ONLY) && !path_equal(m->destination, "/"))
480✔
1031
                        continue;
240✔
1032

1033
                if (FLAGS_SET(mount_settings, MOUNT_NON_ROOT_ONLY) && path_equal(m->destination, "/"))
240✔
1034
                        continue;
×
1035

1036
                switch (m->type) {
240✔
1037

1038
                case CUSTOM_MOUNT_BIND:
230✔
1039
                        r = mount_bind(dest, m, uid_shift, uid_range);
230✔
1040
                        break;
230✔
1041

1042
                case CUSTOM_MOUNT_TMPFS:
4✔
1043
                        r = mount_tmpfs(dest, m, uid_shift, selinux_apifs_context);
4✔
1044
                        break;
4✔
1045

1046
                case CUSTOM_MOUNT_OVERLAY:
2✔
1047
                        r = mount_overlay(dest, m);
2✔
1048
                        break;
2✔
1049

1050
                case CUSTOM_MOUNT_INACCESSIBLE:
4✔
1051
                        r = mount_inaccessible(dest, m);
4✔
1052
                        break;
4✔
1053

1054
                case CUSTOM_MOUNT_ARBITRARY:
×
1055
                        r = mount_arbitrary(dest, m);
×
1056
                        break;
×
1057

1058
                default:
×
1059
                        assert_not_reached();
×
1060
                }
1061

1062
                if (r < 0)
240✔
1063
                        return r;
1064
        }
1065

1066
        return 0;
1067
}
1068

1069
bool has_custom_root_mount(const CustomMount *mounts, size_t n) {
423✔
1070
        FOREACH_ARRAY(m, mounts, n)
875✔
1071
                if (path_equal(m->destination, "/"))
452✔
1072
                        return true;
1073

1074
        return false;
1075
}
1076

1077
static int setup_volatile_state(const char *directory) {
4✔
1078
        int r;
4✔
1079

1080
        assert(directory);
4✔
1081

1082
        /* --volatile=state means we simply overmount /var with a tmpfs, and the rest read-only. */
1083

1084
        /* First, remount the root directory. */
1085
        r = bind_remount_recursive(directory, MS_RDONLY, MS_RDONLY, NULL);
4✔
1086
        if (r < 0)
4✔
1087
                return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
×
1088

1089
        return 0;
1090
}
1091

1092
static int setup_volatile_state_after_remount_idmap(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
4✔
1093
        _cleanup_free_ char *buf = NULL;
4✔
1094
        const char *p, *options;
4✔
1095
        int r;
4✔
1096

1097
        assert(directory);
4✔
1098

1099
        /* Then, after remount_idmap(), overmount /var/ with a tmpfs. */
1100

1101
        p = prefix_roota(directory, "/var");
4✔
1102
        r = mkdir(p, 0755);
4✔
1103
        if (r < 0 && errno != EEXIST)
4✔
1104
                return log_error_errno(errno, "Failed to create %s: %m", directory);
×
1105

1106
        options = "mode=0755" TMPFS_LIMITS_VOLATILE_STATE;
4✔
1107
        r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
6✔
1108
        if (r < 0)
4✔
1109
                return log_oom();
×
1110
        if (r > 0)
4✔
1111
                options = buf;
4✔
1112

1113
        return mount_nofollow_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options);
4✔
1114
}
1115

1116
static int setup_volatile_yes(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
8✔
1117
        bool tmpfs_mounted = false, bind_mounted = false;
8✔
1118
        _cleanup_(rmdir_and_freep) char *template = NULL;
×
1119
        _cleanup_free_ char *buf = NULL, *bindir = NULL;
8✔
1120
        const char *f, *t, *options;
8✔
1121
        struct stat st;
8✔
1122
        int r;
8✔
1123

1124
        assert(directory);
8✔
1125

1126
        /* --volatile=yes means we mount a tmpfs to the root dir, and the original /usr to use inside it, and
1127
         * that read-only. Before we start setting this up let's validate if the image has the /usr merge
1128
         * implemented, and let's output a friendly log message if it hasn't. */
1129

1130
        bindir = path_join(directory, "/bin");
8✔
1131
        if (!bindir)
8✔
1132
                return log_oom();
×
1133
        if (lstat(bindir, &st) < 0) {
8✔
1134
                if (errno != ENOENT)
×
1135
                        return log_error_errno(errno, "Failed to stat /bin directory below image: %m");
×
1136

1137
                /* ENOENT is fine, just means the image is probably just a naked /usr and we can create the
1138
                 * rest. */
1139
        } else if (S_ISDIR(st.st_mode))
8✔
1140
                return log_error_errno(SYNTHETIC_ERRNO(EISDIR),
×
1141
                                       "Sorry, --volatile=yes mode is not supported with OS images that have not merged /bin/, /sbin/, /lib/, /lib64/ into /usr/. "
1142
                                       "Please work with your distribution and help them adopt the merged /usr scheme.");
1143
        else if (!S_ISLNK(st.st_mode))
8✔
1144
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
1145
                                       "Error starting image: if --volatile=yes is used /bin must be a symlink (for merged /usr support) or non-existent (in which case a symlink is created automatically).");
1146

1147
        r = mkdtemp_malloc("/tmp/nspawn-volatile-XXXXXX", &template);
8✔
1148
        if (r < 0)
8✔
1149
                return log_error_errno(r, "Failed to create temporary directory: %m");
×
1150

1151
        options = "mode=0755" TMPFS_LIMITS_ROOTFS;
8✔
1152
        r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
12✔
1153
        if (r < 0)
8✔
1154
                goto fail;
×
1155
        if (r > 0)
8✔
1156
                options = buf;
8✔
1157

1158
        r = mount_nofollow_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
8✔
1159
        if (r < 0)
8✔
1160
                goto fail;
×
1161

1162
        tmpfs_mounted = true;
8✔
1163

1164
        f = prefix_roota(directory, "/usr");
16✔
1165
        t = prefix_roota(template, "/usr");
16✔
1166

1167
        r = mkdir(t, 0755);
8✔
1168
        if (r < 0 && errno != EEXIST) {
8✔
1169
                r = log_error_errno(errno, "Failed to create %s: %m", t);
×
1170
                goto fail;
×
1171
        }
1172

1173
        r = mount_nofollow_verbose(LOG_ERR, f, t, NULL, MS_BIND|MS_REC, NULL);
8✔
1174
        if (r < 0)
8✔
1175
                goto fail;
×
1176

1177
        bind_mounted = true;
8✔
1178

1179
        r = bind_remount_recursive(t, MS_RDONLY, MS_RDONLY, NULL);
8✔
1180
        if (r < 0) {
8✔
1181
                log_error_errno(r, "Failed to remount %s read-only: %m", t);
×
1182
                goto fail;
×
1183
        }
1184

1185
        r = mount_nofollow_verbose(LOG_ERR, template, directory, NULL, MS_MOVE, NULL);
8✔
1186
        if (r < 0)
8✔
1187
                goto fail;
×
1188

1189
        (void) rmdir(template);
8✔
1190

1191
        return 0;
8✔
1192

1193
fail:
1194
        if (bind_mounted)
×
1195
                (void) umount_verbose(LOG_ERR, t, UMOUNT_NOFOLLOW);
×
1196

1197
        if (tmpfs_mounted)
×
1198
                (void) umount_verbose(LOG_ERR, template, UMOUNT_NOFOLLOW);
×
1199

1200
        return r;
1201
}
1202

1203
static int setup_volatile_overlay(const char *directory, uid_t uid_shift, const char *selinux_apifs_context) {
4✔
1204
        _cleanup_free_ char *buf = NULL, *escaped_directory = NULL, *escaped_upper = NULL, *escaped_work = NULL;
4✔
1205
        _cleanup_(rmdir_and_freep) char *template = NULL;
4✔
1206
        const char *upper, *work, *options;
4✔
1207
        bool tmpfs_mounted = false;
4✔
1208
        int r;
4✔
1209

1210
        assert(directory);
4✔
1211

1212
        /* --volatile=overlay means we mount an overlayfs to the root dir. */
1213

1214
        r = mkdtemp_malloc("/tmp/nspawn-volatile-XXXXXX", &template);
4✔
1215
        if (r < 0)
4✔
1216
                return log_error_errno(r, "Failed to create temporary directory: %m");
×
1217

1218
        options = "mode=0755" TMPFS_LIMITS_ROOTFS;
4✔
1219
        r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
6✔
1220
        if (r < 0)
4✔
1221
                goto finish;
×
1222
        if (r > 0)
4✔
1223
                options = buf;
4✔
1224

1225
        r = mount_nofollow_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
4✔
1226
        if (r < 0)
4✔
1227
                goto finish;
×
1228

1229
        tmpfs_mounted = true;
4✔
1230

1231
        upper = strjoina(template, "/upper");
20✔
1232
        work = strjoina(template, "/work");
20✔
1233

1234
        if (mkdir(upper, 0755) < 0) {
4✔
1235
                r = log_error_errno(errno, "Failed to create %s: %m", upper);
×
1236
                goto finish;
×
1237
        }
1238
        if (mkdir(work, 0755) < 0) {
4✔
1239
                r = log_error_errno(errno, "Failed to create %s: %m", work);
×
1240
                goto finish;
×
1241
        }
1242

1243
        /* And now, let's overmount the root dir with an overlayfs that uses the root dir as lower dir. It's kinda nice
1244
         * that the kernel allows us to do that without going through some mount point rearrangements. */
1245

1246
        escaped_directory = shell_escape(directory, ",:");
4✔
1247
        escaped_upper = shell_escape(upper, ",:");
4✔
1248
        escaped_work = shell_escape(work, ",:");
4✔
1249
        if (!escaped_directory || !escaped_upper || !escaped_work) {
4✔
1250
                r = -ENOMEM;
×
1251
                goto finish;
×
1252
        }
1253

1254
        options = strjoina("lowerdir=", escaped_directory, ",upperdir=", escaped_upper, ",workdir=", escaped_work);
52✔
1255
        r = mount_nofollow_verbose(LOG_ERR, "overlay", directory, "overlay", 0, options);
4✔
1256

1257
finish:
1258
        if (tmpfs_mounted)
×
1259
                (void) umount_verbose(LOG_ERR, template, UMOUNT_NOFOLLOW);
4✔
1260

1261
        return r;
1262
}
1263

1264
int setup_volatile_mode(
220✔
1265
                const char *directory,
1266
                VolatileMode mode,
1267
                uid_t uid_shift,
1268
                const char *selinux_apifs_context) {
1269

1270
        switch (mode) {
220✔
1271

1272
        case VOLATILE_YES:
8✔
1273
                return setup_volatile_yes(directory, uid_shift, selinux_apifs_context);
8✔
1274

1275
        case VOLATILE_STATE:
4✔
1276
                return setup_volatile_state(directory);
4✔
1277

1278
        case VOLATILE_OVERLAY:
4✔
1279
                return setup_volatile_overlay(directory, uid_shift, selinux_apifs_context);
4✔
1280

1281
        default:
1282
                return 0;
1283
        }
1284
}
1285

1286
int setup_volatile_mode_after_remount_idmap(
218✔
1287
                const char *directory,
1288
                VolatileMode mode,
1289
                uid_t uid_shift,
1290
                const char *selinux_apifs_context) {
1291

1292
        switch (mode) {
218✔
1293

1294
        case VOLATILE_STATE:
4✔
1295
                return setup_volatile_state_after_remount_idmap(directory, uid_shift, selinux_apifs_context);
4✔
1296

1297
        default:
1298
                return 0;
1299
        }
1300
}
1301

1302
/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
1303
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
2✔
1304
        _cleanup_free_ char *root_new = NULL, *root_old = NULL;
2✔
1305
        const char *p = s;
2✔
1306
        int r;
2✔
1307

1308
        assert(pivot_root_new);
2✔
1309
        assert(pivot_root_old);
2✔
1310

1311
        r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
2✔
1312
        if (r < 0)
2✔
1313
                return r;
1314
        if (r == 0)
2✔
1315
                return -EINVAL;
1316

1317
        if (isempty(p))
2✔
1318
                root_old = NULL;
1319
        else {
1320
                root_old = strdup(p);
×
1321
                if (!root_old)
×
1322
                        return -ENOMEM;
1323
        }
1324

1325
        if (!path_is_absolute(root_new))
2✔
1326
                return -EINVAL;
1327
        if (root_old && !path_is_absolute(root_old))
×
1328
                return -EINVAL;
1329

1330
        free_and_replace(*pivot_root_new, root_new);
×
1331
        free_and_replace(*pivot_root_old, root_old);
×
1332

1333
        return 0;
×
1334
}
1335

1336
int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) {
220✔
1337
        _cleanup_free_ char *directory_pivot_root_new = NULL;
440✔
1338
        _cleanup_free_ char *pivot_tmp_pivot_root_old = NULL;
220✔
1339
        _cleanup_(rmdir_and_freep) char *pivot_tmp = NULL;
220✔
1340
        int r;
220✔
1341

1342
        assert(directory);
220✔
1343

1344
        if (!pivot_root_new)
220✔
1345
                return 0;
1346

1347
        /* Pivot pivot_root_new to / and the existing / to pivot_root_old.
1348
         * If pivot_root_old is NULL, the existing / disappears.
1349
         * This requires a temporary directory, pivot_tmp, which is
1350
         * not a child of either.
1351
         *
1352
         * This is typically used for OSTree-style containers, where the root partition contains several
1353
         * sysroots which could be run. Normally, one would be chosen by the bootloader and pivoted to / by
1354
         * initrd.
1355
         *
1356
         * For example, for an OSTree deployment, pivot_root_new
1357
         * would be: /ostree/deploy/$os/deploy/$checksum. Note that this
1358
         * code doesn’t do the /var mount which OSTree expects: use
1359
         * --bind +/sysroot/ostree/deploy/$os/var:/var for that.
1360
         *
1361
         * So in the OSTree case, we’ll end up with something like:
1362
         *  - directory = /tmp/nspawn-root-123456
1363
         *  - pivot_root_new = /ostree/deploy/os/deploy/123abc
1364
         *  - pivot_root_old = /sysroot
1365
         *  - directory_pivot_root_new =
1366
         *       /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc
1367
         *  - pivot_tmp = /tmp/nspawn-pivot-123456
1368
         *  - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot
1369
         *
1370
         * Requires all file systems at directory and below to be mounted
1371
         * MS_PRIVATE or MS_SLAVE so they can be moved.
1372
         */
1373
        directory_pivot_root_new = path_join(directory, pivot_root_new);
×
1374
        if (!directory_pivot_root_new)
×
1375
                return log_oom();
×
1376

1377
        /* Remount directory_pivot_root_new to make it movable. */
1378
        r = mount_nofollow_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL);
×
1379
        if (r < 0)
×
1380
                return r;
1381

1382
        if (pivot_root_old) {
×
1383
                r = mkdtemp_malloc("/tmp/nspawn-pivot-XXXXXX", &pivot_tmp);
×
1384
                if (r < 0)
×
1385
                        return log_error_errno(r, "Failed to create temporary directory: %m");
×
1386

1387
                pivot_tmp_pivot_root_old = path_join(pivot_tmp, pivot_root_old);
×
1388
                if (!pivot_tmp_pivot_root_old)
×
1389
                        return log_oom();
×
1390

1391
                r = mount_nofollow_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL);
×
1392
                if (r < 0)
×
1393
                        return r;
1394

1395
                r = mount_nofollow_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL);
×
1396
                if (r < 0)
×
1397
                        return r;
1398

1399
                r = mount_nofollow_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL);
×
1400
        } else
1401
                r = mount_nofollow_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL);
×
1402

1403
        if (r < 0)
×
1404
                return r;
×
1405

1406
        return 0;
1407
}
1408

1409
#define NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS "/run/host/proc"
1410
#define NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS "/run/host/sys"
1411

1412
int pin_fully_visible_api_fs(void) {
90✔
1413
        int r;
90✔
1414

1415
        log_debug("Pinning fully visible API FS");
90✔
1416

1417
        (void) mkdir_p(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, 0755);
90✔
1418
        (void) mkdir_p(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, 0755);
90✔
1419

1420
        r = mount_follow_verbose(LOG_ERR, "proc", NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, "proc", PROC_DEFAULT_MOUNT_FLAGS, NULL);
90✔
1421
        if (r < 0)
90✔
1422
                return r;
1423

1424
        r = mount_follow_verbose(LOG_ERR, "sysfs", NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, "sysfs", SYS_DEFAULT_MOUNT_FLAGS, NULL);
90✔
1425
        if (r < 0)
90✔
1426
                return r;
×
1427

1428
        return 0;
1429
}
1430

1431
static int do_wipe_fully_visible_api_fs(void) {
42✔
1432
        if (umount2(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS, MNT_DETACH) < 0)
42✔
1433
                return log_error_errno(errno, "Failed to unmount temporary proc: %m");
×
1434

1435
        if (rmdir(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS) < 0)
42✔
1436
                return log_error_errno(errno, "Failed to remove temporary proc mountpoint: %m");
×
1437

1438
        if (umount2(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS, MNT_DETACH) < 0)
42✔
1439
                return log_error_errno(errno, "Failed to unmount temporary sys: %m");
×
1440

1441
        if (rmdir(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS) < 0)
42✔
1442
                return log_error_errno(errno, "Failed to remove temporary sys mountpoint: %m");
×
1443

1444
        return 0;
1445
}
1446

1447
int wipe_fully_visible_api_fs(int mntns_fd) {
42✔
1448
        _cleanup_close_ int orig_mntns_fd = -EBADF;
42✔
1449
        int r, rr;
42✔
1450

1451
        log_debug("Wiping fully visible API FS");
42✔
1452

1453
        orig_mntns_fd = namespace_open_by_type(NAMESPACE_MOUNT);
42✔
1454
        if (orig_mntns_fd < 0)
42✔
1455
                return log_error_errno(orig_mntns_fd, "Failed to pin originating mount namespace: %m");
×
1456

1457
        r = namespace_enter(/* pidns_fd = */ -EBADF,
42✔
1458
                            mntns_fd,
1459
                            /* netns_fd = */ -EBADF,
1460
                            /* userns_fd = */ -EBADF,
1461
                            /* root_fd = */ -EBADF);
1462
        if (r < 0)
42✔
1463
                return log_error_errno(r, "Failed to enter mount namespace: %m");
×
1464

1465
        rr = do_wipe_fully_visible_api_fs();
42✔
1466

1467
        r = namespace_enter(/* pidns_fd = */ -EBADF,
42✔
1468
                            orig_mntns_fd,
1469
                            /* netns_fd = */ -EBADF,
1470
                            /* userns_fd = */ -EBADF,
1471
                            /* root_fd = */ -EBADF);
1472
        if (r < 0)
42✔
1473
                return log_error_errno(r, "Failed to enter original mount namespace: %m");
×
1474

1475
        return rr;
1476
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc