• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 23927985597

02 Apr 2026 07:45PM UTC coverage: 72.362% (+0.02%) from 72.343%
23927985597

push

github

daandemeyer
ci: Drop base64 encoding in claude review workflow

Doesn't seem to work nearly as good as the previous solution which
just told claude not to escape stuff.

319121 of 441004 relevant lines covered (72.36%)

1167673.48 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.46
/src/shared/mount-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <stdlib.h>
4
#include <sys/mount.h>
5
#include <sys/socket.h>
6
#include <sys/stat.h>
7
#include <unistd.h>
8

9
#include "alloc-util.h"
10
#include "chase.h"
11
#include "creds-util.h"
12
#include "dissect-image.h"
13
#include "errno-util.h"
14
#include "extract-word.h"
15
#include "fd-util.h"
16
#include "fileio.h"
17
#include "format-util.h"
18
#include "fs-util.h"
19
#include "fstab-util.h"
20
#include "glyph-util.h"
21
#include "hashmap.h"
22
#include "libmount-util.h"
23
#include "log.h"
24
#include "mkdir-label.h"
25
#include "mount-util.h"
26
#include "mountpoint-util.h"
27
#include "namespace-util.h"
28
#include "os-util.h"
29
#include "path-util.h"
30
#include "pidref.h"
31
#include "process-util.h"
32
#include "runtime-scope.h"
33
#include "set.h"
34
#include "socket-util.h"
35
#include "sort-util.h"
36
#include "stat-util.h"
37
#include "string-util.h"
38
#include "strv.h"
39
#include "tmpfile-util.h"
40
#include "user-util.h"
41

42
int umount_recursive_full(const char *prefix, int flags, char **keep) {
8,682✔
43
#if HAVE_LIBMOUNT
44
        _cleanup_fclose_ FILE *f = NULL;
8,682✔
45
        int n = 0, r;
8,682✔
46

47
        /* Try to umount everything recursively below a directory. Also, take care of stacked mounts, and
48
         * keep unmounting them until they are gone. */
49

50
        f = fopen("/proc/self/mountinfo", "re"); /* Pin the file, in case we unmount /proc/ as part of the logic here */
8,682✔
51
        if (!f)
8,682✔
52
                return log_debug_errno(errno, "Failed to open %s: %m", "/proc/self/mountinfo");
×
53

54
        for (;;) {
21,912✔
55
                _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
30,594✔
56
                _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
21,912✔
57
                bool again = false;
30,594✔
58

59
                r = libmount_parse_full("/proc/self/mountinfo", f, MNT_ITER_BACKWARD, &table, &iter);
30,594✔
60
                if (r < 0)
30,594✔
61
                        return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
×
62

63
                for (;;) {
1,761,284✔
64
                        bool shall_keep = false;
895,939✔
65
                        struct libmnt_fs *fs;
895,939✔
66
                        const char *path;
895,939✔
67

68
                        r = sym_mnt_table_next_fs(table, iter, &fs);
895,939✔
69
                        if (r == 1)
895,939✔
70
                                break;
71
                        if (r < 0)
887,257✔
72
                                return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
×
73

74
                        path = sym_mnt_fs_get_target(fs);
887,257✔
75
                        if (!path)
887,257✔
76
                                continue;
865,345✔
77

78
                        if (prefix && !path_startswith(path, prefix)) {
1,769,300✔
79
                                // FIXME: This is extremely noisy, we're probably doing something very wrong
80
                                // to trigger this so often, needs more investigation.
81
                                // log_trace("Not unmounting %s, outside of prefix: %s", path, prefix);
82
                                continue;
855,009✔
83
                        }
84

85
                        STRV_FOREACH(k, keep)
32,538✔
86
                                /* Match against anything in the path to the dirs to keep, or below the dirs to keep */
87
                                if (path_startswith(path, *k) || path_startswith(*k, path)) {
5,165✔
88
                                        shall_keep = true;
4,875✔
89
                                        break;
4,875✔
90
                                }
91
                        if (shall_keep) {
37,123✔
92
                                log_debug("Not unmounting %s, referenced by keep list.", path);
4,875✔
93
                                continue;
4,875✔
94
                        }
95

96
                        if (umount2(path, flags | UMOUNT_NOFOLLOW) < 0) {
27,373✔
97
                                log_debug_errno(errno, "Failed to umount %s, ignoring: %m", path);
5,461✔
98
                                continue;
5,461✔
99
                        }
100

101
                        log_trace("Successfully unmounted %s", path);
21,912✔
102

103
                        again = true;
21,912✔
104
                        n++;
21,912✔
105

106
                        break;
21,912✔
107
                }
108

109
                if (!again)
8,682✔
110
                        break;
111

112
                rewind(f);
21,912✔
113
        }
114

115
        return n;
8,682✔
116
#else
117
        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "libmount support not compiled in");
118
#endif
119
}
120

121
#define MS_CONVERTIBLE_FLAGS (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_NOSYMFOLLOW|MS_RELATIME|MS_NOATIME|MS_STRICTATIME|MS_NODIRATIME)
122

123
static uint64_t ms_flags_to_mount_attr(unsigned long a) {
38,225✔
124
        uint64_t f = 0;
38,225✔
125

126
        if (FLAGS_SET(a, MS_RDONLY))
38,225✔
127
                f |= MOUNT_ATTR_RDONLY;
1,536✔
128

129
        if (FLAGS_SET(a, MS_NOSUID))
38,225✔
130
                f |= MOUNT_ATTR_NOSUID;
19,246✔
131

132
        if (FLAGS_SET(a, MS_NODEV))
38,225✔
133
                f |= MOUNT_ATTR_NODEV;
3,337✔
134

135
        if (FLAGS_SET(a, MS_NOEXEC))
38,225✔
136
                f |= MOUNT_ATTR_NOEXEC;
3,337✔
137

138
        if (FLAGS_SET(a, MS_NOSYMFOLLOW))
38,225✔
139
                f |= MOUNT_ATTR_NOSYMFOLLOW;
2,087✔
140

141
        if (FLAGS_SET(a, MS_RELATIME))
38,225✔
142
                f |= MOUNT_ATTR_RELATIME;
38,225✔
143

144
        if (FLAGS_SET(a, MS_NOATIME))
38,225✔
145
                f |= MOUNT_ATTR_NOATIME;
×
146

147
        if (FLAGS_SET(a, MS_STRICTATIME))
38,225✔
148
                f |= MOUNT_ATTR_STRICTATIME;
×
149

150
        if (FLAGS_SET(a, MS_NODIRATIME))
38,225✔
151
                f |= MOUNT_ATTR_NODIRATIME;
×
152

153
        return f;
38,225✔
154
}
155

156
static uint64_t ms_flags_to_mount_attr_clr(unsigned long a) {
1,250✔
157
        uint64_t f = 0;
1,250✔
158

159
        /* As per documentation, if relatime/noatime/strictatime are set, we need to clear the atime flag
160
         * too, otherwise -EINVAL will be returned by the kernel. */
161
        if (FLAGS_SET(a, MS_RELATIME))
1,250✔
162
                f |= MOUNT_ATTR__ATIME;
×
163

164
        if (FLAGS_SET(a, MS_NOATIME))
1,250✔
165
                f |= MOUNT_ATTR__ATIME;
×
166

167
        if (FLAGS_SET(a, MS_STRICTATIME))
1,250✔
168
                f |= MOUNT_ATTR__ATIME;
×
169

170
        return f;
1,250✔
171
}
172

173
static bool skip_mount_set_attr = false;
174

175
/* Use this function only if you do not have direct access to /proc/self/mountinfo but the caller can open it
176
 * for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */
177
int bind_remount_recursive_with_mountinfo(
32,711✔
178
                const char *prefix,
179
                unsigned long new_flags,
180
                unsigned long flags_mask,
181
                char **deny_list,
182
                FILE *proc_self_mountinfo) {
183

184
        assert(prefix);
32,711✔
185

186
        if ((flags_mask & ~MS_CONVERTIBLE_FLAGS) == 0 && strv_isempty(deny_list) && !skip_mount_set_attr) {
47,957✔
187
                /* Let's take a shortcut for all the flags we know how to convert into mount_setattr() flags */
188

189
                if (mount_setattr(AT_FDCWD, prefix, AT_SYMLINK_NOFOLLOW|AT_RECURSIVE,
15,246✔
190
                                  &(struct mount_attr) {
15,246✔
191
                                          .attr_set = ms_flags_to_mount_attr(new_flags & flags_mask),
15,246✔
192
                                          .attr_clr = ms_flags_to_mount_attr(~new_flags & flags_mask),
15,246✔
193
                                  }, MOUNT_ATTR_SIZE_VER0) < 0) {
194

195
                        log_debug_errno(errno, "mount_setattr() failed, falling back to classic remounting: %m");
2✔
196

197
                        /* We fall through to classic behaviour if not supported (i.e. kernel < 5.12). We
198
                         * also do this for all other kinds of errors since they are so many different, and
199
                         * mount_setattr() has no graceful mode where it continues despite seeing errors one
200
                         * some mounts, but we want that. Moreover mount_setattr() only works on the mount
201
                         * point inode itself, not a non-mount point inode, and we want to support arbitrary
202
                         * prefixes here. */
203

204
                        if (ERRNO_IS_NOT_SUPPORTED(errno)) /* if not supported, then don't bother at all anymore */
2✔
205
                                skip_mount_set_attr = true;
×
206
                } else
207
                        return 0; /* Nice, this worked! */
15,244✔
208
        }
209

210
#if HAVE_LIBMOUNT
211
        _cleanup_fclose_ FILE *proc_self_mountinfo_opened = NULL;
32,711✔
212
        _cleanup_set_free_ Set *done = NULL;
17,467✔
213
        unsigned n_tries = 0;
17,467✔
214
        int r;
17,467✔
215

216
        if (!proc_self_mountinfo) {
17,467✔
217
                r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo_opened);
3✔
218
                if (r < 0)
3✔
219
                        return r;
220

221
                proc_self_mountinfo = proc_self_mountinfo_opened;
3✔
222
        }
223

224
        /* Recursively remount a directory (and all its submounts) with desired flags (MS_READONLY,
225
         * MS_NOSUID, MS_NOEXEC). If the directory is already mounted, we reuse the mount and simply mark it
226
         * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write operation), ditto for other flags. If it
227
         * isn't we first make it one. Afterwards we apply (or remove) the flags to all submounts we can
228
         * access, too. When mounts are stacked on the same mount point we only care for each individual
229
         * "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We do
230
         * not have any effect on future submounts that might get propagated, they might be writable
231
         * etc. This includes future submounts that have been triggered via autofs. Also note that we can't
232
         * operate atomically here. Mounts established while we process the tree might or might not get
233
         * noticed and thus might or might not be covered.
234
         *
235
         * If the "deny_list" parameter is specified it may contain a list of subtrees to exclude from the
236
         * remount operation. Note that we'll ignore the deny list for the top-level path. */
237

238
        for (;;) {
34,936✔
239
                _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
34,936✔
240
                _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
34,936✔
241
                _cleanup_hashmap_free_ Hashmap *todo = NULL;
34,934✔
242
                bool top_autofs = false;
34,936✔
243

244
                if (n_tries++ >= 32) /* Let's not retry this loop forever */
34,936✔
245
                        return -EBUSY;
246

247
                rewind(proc_self_mountinfo);
34,936✔
248

249
                r = libmount_parse_mountinfo(proc_self_mountinfo, &table, &iter);
34,936✔
250
                if (r < 0)
34,936✔
251
                        return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
×
252

253
                for (;;) {
2,587,175✔
254
                        _cleanup_free_ char *d = NULL;
2,552,239✔
255
                        const char *path, *type, *opts;
2,587,175✔
256
                        unsigned long flags = 0;
2,587,175✔
257
                        struct libmnt_fs *fs;
2,587,175✔
258

259
                        r = sym_mnt_table_next_fs(table, iter, &fs);
2,587,175✔
260
                        if (r == 1) /* EOF */
2,587,175✔
261
                                break;
262
                        if (r < 0)
2,552,239✔
263
                                return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
×
264

265
                        path = sym_mnt_fs_get_target(fs);
2,552,239✔
266
                        if (!path)
2,552,239✔
267
                                continue;
×
268

269
                        if (!path_startswith(path, prefix))
2,552,239✔
270
                                continue;
2,485,408✔
271

272
                        type = sym_mnt_fs_get_fstype(fs);
66,831✔
273
                        if (!type)
66,831✔
274
                                continue;
×
275

276
                        /* Let's ignore autofs mounts. If they aren't triggered yet, we want to avoid
277
                         * triggering them, as we don't make any guarantees for future submounts anyway. If
278
                         * they are already triggered, then we will find another entry for this. */
279
                        if (streq(type, "autofs")) {
66,831✔
280
                                top_autofs = top_autofs || path_equal(path, prefix);
×
281
                                continue;
×
282
                        }
283

284
                        if (set_contains(done, path))
66,831✔
285
                                continue;
24,145✔
286

287
                        /* Ignore this mount if it is deny-listed, but only if it isn't the top-level mount
288
                         * we shall operate on. */
289
                        if (!path_equal(path, prefix)) {
42,686✔
290
                                bool deny_listed = false;
338,932✔
291

292
                                STRV_FOREACH(i, deny_list) {
338,932✔
293
                                        if (path_equal(*i, prefix))
332,263✔
294
                                                continue;
25,198✔
295

296
                                        if (!path_startswith(*i, prefix))
307,065✔
297
                                                continue;
177,162✔
298

299
                                        if (path_startswith(path, *i)) {
129,903✔
300
                                                deny_listed = true;
301
                                                log_trace("Not remounting %s deny-listed by %s, called for %s", path, *i, prefix);
302
                                                break;
303
                                        }
304
                                }
305

306
                                if (deny_listed)
25,209✔
307
                                        continue;
18,540✔
308
                        }
309

310
                        opts = sym_mnt_fs_get_vfs_options(fs);
24,146✔
311
                        if (opts) {
24,146✔
312
                                r = sym_mnt_optstr_get_flags(opts, &flags, sym_mnt_get_builtin_optmap(MNT_LINUX_MAP));
24,146✔
313
                                if (r < 0)
24,146✔
314
                                        log_debug_errno(r, "Could not get flags for '%s', ignoring: %m", path);
×
315
                        }
316

317
                        d = strdup(path);
24,146✔
318
                        if (!d)
24,146✔
319
                                return -ENOMEM;
320

321
                        r = hashmap_ensure_put(&todo, &path_hash_ops_free, d, ULONG_TO_PTR(flags));
24,146✔
322
                        if (r == -EEXIST)
24,146✔
323
                                /* If the same path was recorded, but with different mount flags, update it:
324
                                 * it means a mount point is overmounted, and libmount returns the "bottom" (or
325
                                 * older one) first, but we want to reapply the flags from the "top" (or newer
326
                                 * one). See: https://github.com/systemd/systemd/issues/20032
327
                                 * Note that this shouldn't really fail, as we were just told that the key
328
                                 * exists, and it's an update so we want 'd' to be freed immediately. */
329
                                r = hashmap_update(todo, d, ULONG_TO_PTR(flags));
5✔
330
                        if (r < 0)
24,146✔
331
                                return r;
332
                        if (r > 0)
24,146✔
333
                                TAKE_PTR(d);
23,898✔
334
                }
335

336
                /* Check if the top-level directory was among what we have seen so far. For that check both
337
                 * 'done' and 'todo'. Also check 'top_autofs' because if the top-level dir is an autofs we'll
338
                 * not include it in either set but will set this bool. */
339
                if (!set_contains(done, prefix) &&
34,936✔
340
                    !(top_autofs || hashmap_contains(todo, prefix))) {
17,469✔
341

342
                        /* The prefix directory itself is not yet a mount, make it one. */
343
                        r = mount_nofollow(prefix, prefix, NULL, MS_BIND|MS_REC, NULL);
2✔
344
                        if (r < 0)
2✔
345
                                return r;
346

347
                        /* Immediately rescan, so that we pick up the new mount's flags */
348
                        continue;
2✔
349
                }
350

351
                /* If we have no submounts to process anymore, we are done */
352
                if (hashmap_isempty(todo))
34,934✔
353
                        return 0;
354

355
                for (;;) {
41,364✔
356
                        unsigned long flags;
41,364✔
357
                        char *x = NULL;
41,364✔
358

359
                        /* Take the first mount from our list of mounts to still process */
360
                        flags = PTR_TO_ULONG(hashmap_steal_first_key_and_value(todo, (void**) &x));
41,364✔
361
                        if (!x)
41,364✔
362
                                break;
363

364
                        r = set_ensure_consume(&done, &path_hash_ops_free, x);
23,897✔
365
                        if (IN_SET(r, 0, -EEXIST))
23,897✔
366
                                continue; /* Already done */
292✔
367
                        if (r < 0)
23,897✔
368
                                return r;
×
369

370
                        /* Now, remount this with the new flags set, but exclude MS_RELATIME from it. (It's
371
                         * the default anyway, thus redundant, and in userns we'll get an error if we try to
372
                         * explicitly enable it) */
373
                        r = mount_nofollow(NULL, x, NULL, ((flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags) & ~MS_RELATIME, NULL);
23,897✔
374
                        if (r < 0) {
23,897✔
375
                                int q;
292✔
376

377
                                /* OK, so the remount of this entry failed. We'll ultimately ignore this in
378
                                 * almost all cases (there are simply so many reasons why this can fail,
379
                                 * think autofs, NFS, FUSE, …), but let's generate useful debug messages at
380
                                 * the very least. */
381

382
                                q = path_is_mount_point(x);
292✔
383
                                if (IN_SET(q, 0, -ENOENT)) {
292✔
384
                                        /* Hmm, whaaaa? The mount point is not actually a mount point? Then
385
                                         * it is either obstructed by a later mount or somebody has been
386
                                         * racing against us and removed it. Either way the mount point
387
                                         * doesn't matter to us, let's ignore it hence. */
388
                                        log_debug_errno(r, "Mount point '%s' to remount is not a mount point anymore, ignoring remount failure: %m", x);
292✔
389
                                        continue;
292✔
390
                                }
391
                                if (q < 0) /* Any other error on this? Just log and continue */
×
392
                                        log_debug_errno(q, "Failed to determine whether '%s' is a mount point or not, ignoring: %m", x);
×
393

394
                                if (((flags ^ new_flags) & flags_mask & ~MS_RELATIME) == 0) { /* ignore MS_RELATIME while comparing */
×
395
                                        log_debug_errno(r, "Couldn't remount '%s', but the flags already match what we want, hence ignoring: %m", x);
×
396
                                        continue;
×
397
                                }
398

399
                                /* Make this fatal if this is the top-level mount */
400
                                if (path_equal(x, prefix))
×
401
                                        return r;
402

403
                                /* If this is not the top-level mount, then handle this gracefully: log but
404
                                 * otherwise ignore. With NFS, FUSE, autofs there are just too many reasons
405
                                 * this might fail without a chance for us to do anything about it, let's
406
                                 * hence be strict on the top-level mount and lenient on the inner ones. */
407
                                log_debug_errno(r, "Couldn't remount submount '%s' for unexpected reason, ignoring: %m", x);
×
408
                                continue;
×
409
                        }
410

411
                        log_trace("Remounted %s.", x);
23,605✔
412
                }
413
        }
414
#else
415
        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "libmount support not compiled in");
416
#endif
417
}
418

419
int bind_remount_one_with_mountinfo(
2,198✔
420
                const char *path,
421
                unsigned long new_flags,
422
                unsigned long flags_mask,
423
                FILE *proc_self_mountinfo) {
424

425
        assert(path);
2,198✔
426
        assert(proc_self_mountinfo);
2,198✔
427

428
        if ((flags_mask & ~MS_CONVERTIBLE_FLAGS) == 0 && !skip_mount_set_attr) {
2,198✔
429
                /* Let's take a shortcut for all the flags we know how to convert into mount_setattr() flags */
430

431
                if (mount_setattr(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW,
2,198✔
432
                                  &(struct mount_attr) {
2,198✔
433
                                          .attr_set = ms_flags_to_mount_attr(new_flags & flags_mask),
2,198✔
434
                                          .attr_clr = ms_flags_to_mount_attr(~new_flags & flags_mask),
2,198✔
435
                                  }, MOUNT_ATTR_SIZE_VER0) < 0) {
436

437
                        log_debug_errno(errno, "mount_setattr() didn't work, falling back to classic remounting: %m");
4✔
438

439
                        if (ERRNO_IS_NOT_SUPPORTED(errno)) /* if not supported, then don't bother at all anymore */
4✔
440
                                skip_mount_set_attr = true;
×
441
                } else
442
                        return 0; /* Nice, this worked! */
2,194✔
443
        }
444

445
#if HAVE_LIBMOUNT
446
        _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
×
447
        unsigned long flags = 0;
4✔
448
        struct libmnt_fs *fs;
4✔
449
        const char *opts;
4✔
450
        int r;
4✔
451

452
        rewind(proc_self_mountinfo);
4✔
453

454
        r = dlopen_libmount();
4✔
455
        if (r < 0)
4✔
456
                return r;
457

458
        table = sym_mnt_new_table();
4✔
459
        if (!table)
4✔
460
                return -ENOMEM;
461

462
        r = sym_mnt_table_parse_stream(table, proc_self_mountinfo, "/proc/self/mountinfo");
4✔
463
        if (r < 0)
4✔
464
                return r;
465

466
        fs = sym_mnt_table_find_target(table, path, MNT_ITER_FORWARD);
4✔
467
        if (!fs) {
4✔
468
                r = access_nofollow(path, F_OK); /* Hmm, it's not in the mount table, but does it exist at all? */
4✔
469
                if (r < 0)
4✔
470
                        return r;
471

472
                return -EINVAL; /* Not a mount point we recognize */
2✔
473
        }
474

475
        opts = sym_mnt_fs_get_vfs_options(fs);
×
476
        if (opts) {
×
477
                r = sym_mnt_optstr_get_flags(opts, &flags, sym_mnt_get_builtin_optmap(MNT_LINUX_MAP));
×
478
                if (r < 0)
×
479
                        log_debug_errno(r, "Could not get flags for '%s', ignoring: %m", path);
×
480
        }
481

482
        r = mount_nofollow(NULL, path, NULL, ((flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags) & ~MS_RELATIME, NULL);
×
483
        if (r < 0) {
×
484
                if (((flags ^ new_flags) & flags_mask & ~MS_RELATIME) != 0) /* Ignore MS_RELATIME again,
×
485
                                                                             * since kernel adds it in
486
                                                                             * everywhere, because it's the
487
                                                                             * default. */
488
                        return r;
489

490
                /* Let's handle redundant remounts gracefully */
491
                log_debug_errno(r, "Failed to remount '%s' but flags already match what we want, ignoring: %m", path);
4✔
492
        }
493

494
        return 0;
495
#else
496
        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "libmount support not compiled in");
497
#endif
498
}
499

500
int bind_remount_one(const char *path, unsigned long new_flags, unsigned long flags_mask) {
56✔
501
        _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
56✔
502

503
        proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
56✔
504
        if (!proc_self_mountinfo)
56✔
505
                return log_debug_errno(errno, "Failed to open %s: %m", "/proc/self/mountinfo");
×
506

507
        return bind_remount_one_with_mountinfo(path, new_flags, flags_mask, proc_self_mountinfo);
56✔
508
}
509

510
static int mount_switch_root_pivot(int fd_newroot, const char *path) {
2,293✔
511
        assert(fd_newroot >= 0);
2,293✔
512
        assert(path);
2,293✔
513

514
        /* Let the kernel tuck the new root under the old one. */
515
        if (pivot_root(".", ".") < 0)
2,293✔
516
                return log_debug_errno(errno, "Failed to pivot root to new rootfs '%s': %m", path);
×
517

518
        /* Get rid of the old root and reveal our brand new root. (This will always operate on the top-most
519
         * mount on our cwd, regardless what our current directory actually points to.) */
520
        if (umount2(".", MNT_DETACH) < 0)
2,293✔
521
                return log_debug_errno(errno, "Failed to unmount old rootfs: %m");
×
522

523
        return 0;
524
}
525

526
static int mount_switch_root_move(int fd_newroot, const char *path) {
×
527
        assert(fd_newroot >= 0);
×
528
        assert(path);
×
529

530
        /* Move the new root fs */
531
        if (mount(".", "/", NULL, MS_MOVE, NULL) < 0)
×
532
                return log_debug_errno(errno, "Failed to move new rootfs '%s': %m", path);
×
533

534
        /* Also change root dir */
535
        if (chroot(".") < 0)
×
536
                return log_debug_errno(errno, "Failed to chroot to new rootfs '%s': %m", path);
×
537

538
        return 0;
539
}
540

541
int mount_switch_root_full(const char *path, unsigned long mount_propagation_flag, bool force_ms_move) {
2,295✔
542
        _cleanup_close_ int fd_newroot = -EBADF;
2,295✔
543
        int r, is_current_root;
2,295✔
544

545
        assert(path);
2,295✔
546
        assert(mount_propagation_flag_is_valid(mount_propagation_flag));
2,295✔
547

548
        fd_newroot = open(path, O_PATH|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
2,295✔
549
        if (fd_newroot < 0)
2,295✔
550
                return log_debug_errno(errno, "Failed to open new rootfs '%s': %m", path);
×
551

552
        is_current_root = path_is_root_at(fd_newroot, NULL);
2,295✔
553
        if (is_current_root < 0)
2,295✔
554
                return log_debug_errno(is_current_root, "Failed to determine if target dir is our root already: %m");
×
555

556
        /* Change into the new rootfs. */
557
        if (fchdir(fd_newroot) < 0)
2,295✔
558
                return log_debug_errno(errno, "Failed to chdir into new rootfs '%s': %m", path);
×
559

560
        /* Make this a NOP if we are supposed to switch to our current root fs. After all, both pivot_root()
561
         * and MS_MOVE don't like that. */
562
        if (!is_current_root) {
2,295✔
563
                if (!force_ms_move) {
2,293✔
564
                        r = mount_switch_root_pivot(fd_newroot, path);
2,293✔
565
                        if (r < 0) {
2,293✔
566
                                log_debug_errno(r, "Failed to pivot into new rootfs '%s', will try to use MS_MOVE instead: %m", path);
×
567
                                force_ms_move = true;
568
                        }
569
                }
570
                if (force_ms_move) {
571
                        /* Failed to pivot_root() fallback to MS_MOVE. For example, this may happen if the rootfs is
572
                         * an initramfs in which case pivot_root() isn't supported. */
573
                        r = mount_switch_root_move(fd_newroot, path);
×
574
                        if (r < 0)
×
575
                                return log_debug_errno(r, "Failed to switch to new rootfs '%s' with MS_MOVE: %m", path);
×
576
                }
577
        }
578

579
        log_debug("Successfully switched root to '%s'.", path);
2,295✔
580

581
        /* Finally, let's establish the requested propagation flags. */
582
        if (mount_propagation_flag == 0)
2,295✔
583
                return 0;
584

585
        if (mount(NULL, ".", NULL, mount_propagation_flag | MS_REC, NULL) < 0)
237✔
586
                return log_debug_errno(errno, "Failed to turn new rootfs '%s' into %s mount: %m",
×
587
                                       mount_propagation_flag_to_string(mount_propagation_flag), path);
588

589
        return 0;
590
}
591

592
int repeat_unmount(const char *path, int flags) {
17✔
593
        bool done = false;
17✔
594

595
        assert(path);
17✔
596

597
        /* If there are multiple mounts on a mount point, this
598
         * removes them all */
599

600
        for (;;) {
48✔
601
                if (umount2(path, flags) < 0) {
48✔
602

603
                        if (errno == EINVAL)
17✔
604
                                return done;
17✔
605

606
                        return -errno;
×
607
                }
608

609
                done = true;
610
        }
611
}
612

613
int mode_to_inaccessible_node(
4,795✔
614
                const char *runtime_dir,
615
                mode_t mode,
616
                char **ret) {
617

618
        /* This function maps a node type to a corresponding inaccessible file node. These nodes are created
619
         * during early boot by PID 1. In some cases we lacked the privs to create the character and block
620
         * devices (maybe because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a
621
         * devices policy that excludes device nodes with major and minor of 0), but that's fine, in that
622
         * case we use an AF_UNIX file node instead, which is not the same, but close enough for most
623
         * uses. And most importantly, the kernel allows bind mounts from socket nodes to any non-directory
624
         * file nodes, and that's the most important thing that matters.
625
         *
626
         * Note that the runtime directory argument shall be the top-level runtime directory, i.e. /run/ if
627
         * we operate in system context and $XDG_RUNTIME_DIR if we operate in user context. */
628

629
        _cleanup_free_ char *d = NULL;
4,795✔
630
        const char *node;
4,795✔
631

632
        assert(ret);
4,795✔
633

634
        if (!runtime_dir)
4,795✔
635
                runtime_dir = "/run";
4✔
636

637
        if (S_ISLNK(mode))
4,795✔
638
                return -EINVAL;
639

640
        node = inode_type_to_string(mode);
4,795✔
641
        if (!node)
4,795✔
642
                return -EINVAL;
643

644
        d = path_join(runtime_dir, "systemd/inaccessible", node);
4,795✔
645
        if (!d)
4,795✔
646
                return -ENOMEM;
647

648
        /* On new kernels unprivileged users are permitted to create 0:0 char device nodes (because they also
649
         * act as whiteout inode for overlayfs), but no other char or block device nodes. On old kernels no
650
         * device node whatsoever may be created by unprivileged processes. Hence, if the caller asks for the
651
         * inaccessible block device node let's see if the block device node actually exists, and if not,
652
         * fall back to the character device node. From there fall back to the socket device node. This means
653
         * in the best case we'll get the right device node type — but if not we'll hopefully at least get a
654
         * device node at all. */
655

656
        if (S_ISBLK(mode) &&
4,795✔
657
            access(d, F_OK) < 0 && errno == ENOENT) {
×
658
                free(d);
×
659
                d = path_join(runtime_dir, "/systemd/inaccessible/chr");
×
660
                if (!d)
×
661
                        return -ENOMEM;
662
        }
663

664
        if (IN_SET(mode & S_IFMT, S_IFBLK, S_IFCHR) &&
5,344✔
665
            access(d, F_OK) < 0 && errno == ENOENT) {
549✔
666
                free(d);
×
667
                d = path_join(runtime_dir, "/systemd/inaccessible/sock");
×
668
                if (!d)
×
669
                        return -ENOMEM;
670
        }
671

672
        *ret = TAKE_PTR(d);
4,795✔
673
        return 0;
4,795✔
674
}
675

676
int mount_flags_to_string(unsigned long flags, char **ret) {
35,922✔
677
        static const struct {
35,922✔
678
                unsigned long flag;
679
                const char *name;
680
        } map[] = {
681
                { .flag = MS_RDONLY,      .name = "MS_RDONLY",      },
682
                { .flag = MS_NOSUID,      .name = "MS_NOSUID",      },
683
                { .flag = MS_NODEV,       .name = "MS_NODEV",       },
684
                { .flag = MS_NOEXEC,      .name = "MS_NOEXEC",      },
685
                { .flag = MS_SYNCHRONOUS, .name = "MS_SYNCHRONOUS", },
686
                { .flag = MS_REMOUNT,     .name = "MS_REMOUNT",     },
687
                { .flag = MS_MANDLOCK,    .name = "MS_MANDLOCK",    },
688
                { .flag = MS_DIRSYNC,     .name = "MS_DIRSYNC",     },
689
                { .flag = MS_NOSYMFOLLOW, .name = "MS_NOSYMFOLLOW", },
690
                { .flag = MS_NOATIME,     .name = "MS_NOATIME",     },
691
                { .flag = MS_NODIRATIME,  .name = "MS_NODIRATIME",  },
692
                { .flag = MS_BIND,        .name = "MS_BIND",        },
693
                { .flag = MS_MOVE,        .name = "MS_MOVE",        },
694
                { .flag = MS_REC,         .name = "MS_REC",         },
695
                { .flag = MS_SILENT,      .name = "MS_SILENT",      },
696
                { .flag = MS_POSIXACL,    .name = "MS_POSIXACL",    },
697
                { .flag = MS_UNBINDABLE,  .name = "MS_UNBINDABLE",  },
698
                { .flag = MS_PRIVATE,     .name = "MS_PRIVATE",     },
699
                { .flag = MS_SLAVE,       .name = "MS_SLAVE",       },
700
                { .flag = MS_SHARED,      .name = "MS_SHARED",      },
701
                { .flag = MS_RELATIME,    .name = "MS_RELATIME",    },
702
                { .flag = MS_KERNMOUNT,   .name = "MS_KERNMOUNT",   },
703
                { .flag = MS_I_VERSION,   .name = "MS_I_VERSION",   },
704
                { .flag = MS_STRICTATIME, .name = "MS_STRICTATIME", },
705
                { .flag = MS_LAZYTIME,    .name = "MS_LAZYTIME",    },
706
        };
707
        _cleanup_free_ char *str = NULL;
35,922✔
708

709
        assert(ret);
35,922✔
710

711
        FOREACH_ELEMENT(entry, map)
933,972✔
712
                if (flags & entry->flag) {
898,050✔
713
                        if (!strextend_with_separator(&str, "|", entry->name))
82,556✔
714
                                return -ENOMEM;
715
                        flags &= ~entry->flag;
82,556✔
716
                }
717

718
        if (!str || flags != 0)
35,922✔
719
                if (strextendf_with_separator(&str, "|", "%lx", flags) < 0)
193✔
720
                        return -ENOMEM;
721

722
        *ret = TAKE_PTR(str);
35,922✔
723
        return 0;
35,922✔
724
}
725

726
int mount_verbose_full(
34,631✔
727
                int error_log_level,
728
                const char *what,
729
                const char *where,
730
                const char *type,
731
                unsigned long flags,
732
                const char *options,
733
                bool follow_symlink) {
734

735
        _cleanup_free_ char *fl = NULL, *o = NULL;
34,631✔
736
        unsigned long f;
34,631✔
737
        int r;
34,631✔
738

739
        r = mount_option_mangle(options, flags, &f, &o);
34,631✔
740
        if (r < 0)
34,631✔
741
                return log_full_errno(error_log_level, r,
×
742
                                      "Failed to mangle mount options %s: %m",
743
                                      strempty(options));
744

745
        (void) mount_flags_to_string(f, &fl);
34,631✔
746

747
        if (FLAGS_SET(f, MS_REMOUNT|MS_BIND))
34,631✔
748
                log_debug("Changing mount flags %s (%s \"%s\")...",
7,027✔
749
                          where, strnull(fl), strempty(o));
750
        else if (f & MS_REMOUNT)
31,117✔
751
                log_debug("Remounting superblock %s (%s \"%s\")...",
4✔
752
                          where, strnull(fl), strempty(o));
753
        else if (f & (MS_SHARED|MS_PRIVATE|MS_SLAVE|MS_UNBINDABLE))
31,113✔
754
                log_debug("Changing mount propagation %s (%s \"%s\")",
6,995✔
755
                          where, strnull(fl), strempty(o));
756
        else if (f & MS_BIND)
27,613✔
757
                log_debug("Bind-mounting %s on %s (%s \"%s\")...",
43,408✔
758
                          what, where, strnull(fl), strempty(o));
759
        else if (f & MS_MOVE)
5,899✔
760
                log_debug("Moving mount %s %s %s (%s \"%s\")...",
1,008✔
761
                          what, glyph(GLYPH_ARROW_RIGHT), where, strnull(fl), strempty(o));
762
        else
763
                log_debug("Mounting %s (%s) on %s (%s \"%s\")...",
6,424✔
764
                          strna(what), strna(type), where, strnull(fl), strempty(o));
765

766
        if (follow_symlink)
34,631✔
767
                r = RET_NERRNO(mount(what, where, type, f, o));
34,631✔
768
        else
769
                r = mount_nofollow(what, where, type, f, o);
33,396✔
770
        if (r < 0)
33,396✔
771
                return log_full_errno(error_log_level, r,
3,870✔
772
                                      "Failed to mount %s (type %s) on %s (%s \"%s\"): %m",
773
                                      strna(what), strna(type), where, strnull(fl), strempty(o));
774
        return 0;
775
}
776

777
int umount_verbose(
711✔
778
                int error_log_level,
779
                const char *where,
780
                int flags) {
781

782
        assert(where);
711✔
783

784
        log_debug("Unmounting '%s'...", where);
711✔
785

786
        if (umount2(where, flags) < 0)
711✔
787
                return log_full_errno(error_log_level, errno, "Failed to unmount '%s': %m", where);
174✔
788

789
        return 0;
790
}
791

792
int umountat_detach_verbose(
242✔
793
                int error_log_level,
794
                int fd,
795
                const char *where) {
796

797
        /* Similar to umountat_verbose(), but goes by fd + path. This implies MNT_DETACH, since to do this we
798
         * must pin the inode in question via an fd. */
799

800
        assert(fd >= 0 || fd == AT_FDCWD);
242✔
801

802
        /* If neither fd nor path are specified take this as reference to the cwd */
803
        if (fd == AT_FDCWD && isempty(where))
242✔
804
                return umount_verbose(error_log_level, ".", MNT_DETACH|UMOUNT_NOFOLLOW);
242✔
805

806
        /* If we don't actually take the fd into consideration for this operation shortcut things, so that we
807
         * don't have to open the inode */
808
        if (fd == AT_FDCWD || path_is_absolute(where))
242✔
809
                return umount_verbose(error_log_level, where, MNT_DETACH|UMOUNT_NOFOLLOW);
×
810

811
        _cleanup_free_ char *prefix = NULL;
484✔
812
        const char *p;
242✔
813
        if (fd_get_path(fd, &prefix) < 0)
242✔
814
                p = "<fd>"; /* if we can't get the path, return something vaguely useful */
815
        else
816
                p = prefix;
242✔
817
        _cleanup_free_ char *joined = isempty(where) ? strdup(p) : path_join(p, where);
620✔
818

819
        log_debug("Unmounting '%s'...", strna(joined));
242✔
820

821
        _cleanup_close_ int inode_fd = -EBADF;
242✔
822
        int mnt_fd;
242✔
823
        if (isempty(where))
242✔
824
                mnt_fd = fd;
825
        else {
826
                inode_fd = openat(fd, where, O_PATH|O_CLOEXEC|O_NOFOLLOW);
136✔
827
                if (inode_fd < 0)
136✔
828
                        return log_full_errno(error_log_level, errno, "Failed to pin '%s': %m", strna(joined));
×
829

830
                mnt_fd = inode_fd;
831
        }
832

833
        if (umount2(FORMAT_PROC_FD_PATH(mnt_fd), MNT_DETACH) < 0)
242✔
834
                return log_full_errno(error_log_level, errno, "Failed to unmount '%s': %m", strna(joined));
15✔
835

836
        return 0;
227✔
837
}
838

839
int mount_exchange_graceful(int fsmount_fd, const char *dest, bool mount_beneath) {
90✔
840
        int r;
90✔
841

842
        assert(fsmount_fd >= 0);
90✔
843
        assert(dest);
90✔
844

845
        /* First, try to mount beneath an existing mount point, and if that works, umount the old mount,
846
         * which is now at the top. This will ensure we can atomically replace a mount. Note that this works
847
         * also in the case where there are submounts down the tree. Mount propagation is allowed but
848
         * restricted to layouts that don't end up propagation the new mount on top of the mount stack.  If
849
         * this is not supported (minimum kernel v6.5), or if there is no mount on the mountpoint, we get
850
         * -EINVAL and then we fallback to normal mounting. */
851

852
        r = RET_NERRNO(move_mount(fsmount_fd, /* from_path= */ "",
161✔
853
                                  /* to_fd= */ -EBADF, dest,
854
                                  MOVE_MOUNT_F_EMPTY_PATH | (mount_beneath ? MOVE_MOUNT_BENEATH : 0)));
855
        if (mount_beneath) {
90✔
856
                if (r >= 0) /* Mounting beneath worked! Now unmount the upper mount. */
19✔
857
                        return umount_verbose(LOG_DEBUG, dest, UMOUNT_NOFOLLOW|MNT_DETACH);
15✔
858

859
                if (r == -EINVAL) { /* Fallback if mount_beneath is not supported */
4✔
860
                        log_debug_errno(r,
4✔
861
                                        "Cannot mount beneath '%s', falling back to overmount: %m",
862
                                        dest);
863
                        return mount_exchange_graceful(fsmount_fd, dest, /* mount_beneath= */ false);
4✔
864
                }
865
        }
866

867
        return r;
868
}
869

870
int mount_option_mangle(
35,964✔
871
                const char *options,
872
                unsigned long mount_flags,
873
                unsigned long *ret_mount_flags,
874
                char **ret_remaining_options) {
875

876
        assert(ret_mount_flags);
35,964✔
877
        assert(ret_remaining_options);
35,964✔
878

879
        if (!options) {
35,964✔
880
                *ret_mount_flags = mount_flags;
30,805✔
881
                *ret_remaining_options = NULL;
30,805✔
882
                return 0;
35,964✔
883
        }
884

885
#if HAVE_LIBMOUNT
886
        const struct libmnt_optmap *map;
5,159✔
887
        _cleanup_free_ char *ret = NULL;
5,159✔
888
        int r;
5,159✔
889

890
        /* This extracts mount flags from the mount options, and stores
891
         * non-mount-flag options to '*ret_remaining_options'.
892
         * E.g.,
893
         * "rw,nosuid,nodev,relatime,size=1630748k,mode=0700,uid=1000,gid=1000"
894
         * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
895
         * "size=1630748k,mode=0700,uid=1000,gid=1000".
896
         * See more examples in test-mount-util.c.
897
         *
898
         * If 'options' does not contain any non-mount-flag options,
899
         * then '*ret_remaining_options' is set to NULL instead of empty string.
900
         * The validity of options stored in '*ret_remaining_options' is not checked.
901
         * If 'options' is NULL, this just copies 'mount_flags' to *ret_mount_flags. */
902

903
        r = dlopen_libmount();
5,159✔
904
        if (r < 0)
5,159✔
905
                return r;
906

907
        map = sym_mnt_get_builtin_optmap(MNT_LINUX_MAP);
5,159✔
908
        if (!map)
5,159✔
909
                return -EINVAL;
910

911
        for (const char *p = options;;) {
5,159✔
912
                _cleanup_free_ char *word = NULL;
16,634✔
913
                const struct libmnt_optmap *ent;
21,792✔
914

915
                r = extract_first_word(&p, &word, ",", EXTRACT_KEEP_QUOTE);
21,792✔
916
                if (r < 0)
21,792✔
917
                        return r;
918
                if (r == 0)
21,791✔
919
                        break;
920

921
                for (ent = map; ent->name; ent++) {
695,825✔
922
                        /* All entries in MNT_LINUX_MAP do not take any argument.
923
                         * Thus, ent->name does not contain "=" or "[=]". */
924
                        if (!streq(word, ent->name))
679,292✔
925
                                continue;
679,192✔
926

927
                        if (!(ent->mask & MNT_INVERT))
100✔
928
                                mount_flags |= ent->id;
91✔
929
                        else
930
                                mount_flags &= ~ent->id;
9✔
931

932
                        break;
933
                }
934

935
                /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
936
                if (!ent->name &&
33,166✔
937
                    !startswith_no_case(word, "x-") &&
33,064✔
938
                    !strextend_with_separator(&ret, ",", word))
16,531✔
939
                        return -ENOMEM;
940
        }
941

942
        *ret_mount_flags = mount_flags;
5,158✔
943
        *ret_remaining_options = TAKE_PTR(ret);
5,158✔
944

945
        return 0;
5,158✔
946
#else
947
        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "libmount support not compiled in");
948
#endif
949
}
950

951
static int mount_in_namespace_legacy(
×
952
                const char *chased_src_path,
953
                int chased_src_fd,
954
                struct stat *chased_src_st,
955
                const char *propagate_path,
956
                const char *incoming_path,
957
                const char *dest,
958
                int pidns_fd,
959
                int mntns_fd,
960
                int root_fd,
961
                MountInNamespaceFlags flags,
962
                const MountOptions *options,
963
                const ImagePolicy *image_policy) {
964

965
        _cleanup_close_pair_ int errno_pipe_fd[2] = EBADF_PAIR;
×
966
        char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p;
×
967
        bool mount_slave_created = false, mount_slave_mounted = false,
×
968
                mount_tmp_created = false, mount_tmp_mounted = false,
×
969
                mount_outside_created = false, mount_outside_mounted = false;
×
970
        _cleanup_(pidref_done) PidRef child = PIDREF_NULL;
×
971
        int r;
×
972

973
        assert(chased_src_path);
×
974
        assert(chased_src_fd >= 0);
×
975
        assert(chased_src_st);
×
976
        assert(propagate_path);
×
977
        assert(incoming_path);
×
978
        assert(dest);
×
979
        assert(pidns_fd >= 0);
×
980
        assert(mntns_fd >= 0);
×
981
        assert(root_fd >= 0);
×
982
        assert(!options || (flags & MOUNT_IN_NAMESPACE_IS_IMAGE));
×
983

984
        p = strjoina(propagate_path, "/");
×
985
        r = access_nofollow(p, F_OK);
×
986
        if (r < 0)
×
987
                return log_debug_errno(r == -ENOENT ? SYNTHETIC_ERRNO(EOPNOTSUPP) : r, "Target does not allow propagation of mount points");
×
988

989
        /* Our goal is to install a new bind mount into the container,
990
           possibly read-only. This is irritatingly complex
991
           unfortunately, currently.
992

993
           First, we start by creating a private playground in /tmp,
994
           that we can mount MS_SLAVE. (Which is necessary, since
995
           MS_MOVE cannot be applied to mounts with MS_SHARED parent
996
           mounts.) */
997

998
        if (!mkdtemp(mount_slave))
×
999
                return log_debug_errno(errno, "Failed to create playground %s: %m", mount_slave);
×
1000

1001
        mount_slave_created = true;
×
1002

1003
        r = mount_nofollow_verbose(LOG_DEBUG, mount_slave, mount_slave, NULL, MS_BIND, NULL);
×
1004
        if (r < 0)
×
1005
                goto finish;
×
1006

1007
        mount_slave_mounted = true;
×
1008

1009
        r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_slave, NULL, MS_SLAVE, NULL);
×
1010
        if (r < 0)
×
1011
                goto finish;
×
1012

1013
        /* Second, we mount the source file or directory to a directory inside of our MS_SLAVE playground. */
1014
        mount_tmp = strjoina(mount_slave, "/mount");
×
1015
        r = make_mount_point_inode_from_mode(AT_FDCWD, mount_tmp, (flags & MOUNT_IN_NAMESPACE_IS_IMAGE) ? S_IFDIR : chased_src_st->st_mode, 0700);
×
1016
        if (r < 0) {
×
1017
                log_debug_errno(r, "Failed to create temporary mount point %s: %m", mount_tmp);
×
1018
                goto finish;
×
1019
        }
1020

1021
        mount_tmp_created = true;
×
1022

1023
        if (flags & MOUNT_IN_NAMESPACE_IS_IMAGE)
×
1024
                r = verity_dissect_and_mount(
×
1025
                                chased_src_fd,
1026
                                chased_src_path,
1027
                                mount_tmp,
1028
                                options,
1029
                                image_policy,
1030
                                /* image_filter= */ NULL,
1031
                                /* extension_release_data= */ NULL,
1032
                                /* required_class= */ _IMAGE_CLASS_INVALID,
1033
                                /* verity= */ NULL,
1034
                                RUNTIME_SCOPE_SYSTEM,
1035
                                /* ret_image= */ NULL);
1036
        else
1037
                r = mount_follow_verbose(LOG_DEBUG, FORMAT_PROC_FD_PATH(chased_src_fd), mount_tmp, NULL, MS_BIND, NULL);
×
1038
        if (r < 0)
×
1039
                goto finish;
×
1040

1041
        mount_tmp_mounted = true;
×
1042

1043
        /* Third, we remount the new bind mount read-only if requested. */
1044
        if (flags & MOUNT_IN_NAMESPACE_READ_ONLY) {
×
1045
                r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
×
1046
                if (r < 0)
×
1047
                        goto finish;
×
1048
        }
1049

1050
        /* Fourth, we move the new bind mount into the propagation directory. This way it will appear there read-only
1051
         * right-away. */
1052

1053
        mount_outside = strjoina(propagate_path, "/XXXXXX");
×
1054
        if ((flags & MOUNT_IN_NAMESPACE_IS_IMAGE) || S_ISDIR(chased_src_st->st_mode))
×
1055
                r = mkdtemp(mount_outside) ? 0 : -errno;
×
1056
        else {
1057
                r = mkostemp_safe(mount_outside);
×
1058
                safe_close(r);
×
1059
        }
1060
        if (r < 0) {
×
1061
                log_debug_errno(r, "Cannot create propagation file or directory %s: %m", mount_outside);
×
1062
                goto finish;
×
1063
        }
1064

1065
        mount_outside_created = true;
×
1066

1067
        r = mount_nofollow_verbose(LOG_DEBUG, mount_tmp, mount_outside, NULL, MS_MOVE, NULL);
×
1068
        if (r < 0)
×
1069
                goto finish;
×
1070

1071
        mount_outside_mounted = true;
×
1072
        mount_tmp_mounted = false;
×
1073

1074
        if ((flags & MOUNT_IN_NAMESPACE_IS_IMAGE) || S_ISDIR(chased_src_st->st_mode))
×
1075
                (void) rmdir(mount_tmp);
×
1076
        else
1077
                (void) unlink(mount_tmp);
×
1078
        mount_tmp_created = false;
×
1079

1080
        (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW);
×
1081
        mount_slave_mounted = false;
×
1082

1083
        (void) rmdir(mount_slave);
×
1084
        mount_slave_created = false;
×
1085

1086
        if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) {
×
1087
                log_debug_errno(errno, "Failed to create pipe: %m");
×
1088
                goto finish;
×
1089
        }
1090

1091
        r = namespace_fork(
×
1092
                        "(sd-bindmnt)",
1093
                        "(sd-bindmnt-inner)",
1094
                        FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM,
1095
                        pidns_fd,
1096
                        mntns_fd,
1097
                        /* netns_fd= */ -EBADF,
1098
                        /* userns_fd= */ -EBADF,
1099
                        root_fd,
1100
                        &child);
1101
        if (r < 0)
×
1102
                goto finish;
×
1103
        if (r == 0) {
×
1104
                _cleanup_free_ char *mount_outside_fn = NULL, *mount_inside = NULL;
×
1105

1106
                errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
×
1107

1108
                _cleanup_close_ int dest_fd = -EBADF;
×
1109
                _cleanup_free_ char *dest_fn = NULL;
×
1110
                r = chase(dest, /* root= */ NULL, CHASE_PARENT|CHASE_EXTRACT_FILENAME|((flags & MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY) ? CHASE_MKDIR_0755 : 0), &dest_fn, &dest_fd);
×
1111
                if (r < 0)
×
1112
                        log_debug_errno(r, "Failed to pin parent directory of mount '%s', ignoring: %m", dest);
×
1113
                else if (flags & MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY) {
×
1114
                        r = make_mount_point_inode_from_mode(dest_fd, dest_fn, (flags & MOUNT_IN_NAMESPACE_IS_IMAGE) ? S_IFDIR : chased_src_st->st_mode, 0700);
×
1115
                        if (r < 0)
×
1116
                                log_debug_errno(r, "Failed to make mount point inode of mount '%s', ignoring: %m", dest);
×
1117
                }
1118

1119
                /* Fifth, move the mount to the right place inside */
1120
                r = path_extract_filename(mount_outside, &mount_outside_fn);
×
1121
                if (r < 0) {
×
1122
                        log_debug_errno(r, "Failed to extract filename from propagation file or directory '%s': %m", mount_outside);
×
1123
                        report_errno_and_exit(errno_pipe_fd[1], r);
×
1124
                }
1125

1126
                mount_inside = path_join(incoming_path, mount_outside_fn);
×
1127
                if (!mount_inside)
×
1128
                        report_errno_and_exit(errno_pipe_fd[1], log_oom_debug());
×
1129

1130
                r = mount_nofollow_verbose(LOG_DEBUG, mount_inside, dest_fd >= 0 ? FORMAT_PROC_FD_PATH(dest_fd) : dest, /* fstype= */ NULL, MS_MOVE, /* options= */ NULL);
×
1131
                if (r < 0)
×
1132
                        report_errno_and_exit(errno_pipe_fd[1], r);
×
1133

1134
                _exit(EXIT_SUCCESS);
×
1135
        }
1136

1137
        errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
×
1138

1139
        r = pidref_wait_for_terminate_and_check("(sd-bindmnt)", &child, 0);
×
1140
        if (r < 0) {
×
1141
                log_debug_errno(r, "Failed to wait for child: %m");
×
1142
                goto finish;
×
1143
        }
1144
        if (r != EXIT_SUCCESS) {
×
1145
                if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r))
×
1146
                        log_debug_errno(r, "Failed to mount: %m");
×
1147
                else
1148
                        log_debug("Child failed.");
×
1149
                goto finish;
×
1150
        }
1151

1152
finish:
×
1153
        if (mount_outside_mounted)
×
1154
                (void) umount_verbose(LOG_DEBUG, mount_outside, UMOUNT_NOFOLLOW);
×
1155
        if (mount_outside_created) {
×
1156
                if ((flags & MOUNT_IN_NAMESPACE_IS_IMAGE) || S_ISDIR(chased_src_st->st_mode))
×
1157
                        (void) rmdir(mount_outside);
×
1158
                else
1159
                        (void) unlink(mount_outside);
×
1160
        }
1161

1162
        if (mount_tmp_mounted)
×
1163
                (void) umount_verbose(LOG_DEBUG, mount_tmp, UMOUNT_NOFOLLOW);
×
1164
        if (mount_tmp_created) {
×
1165
                if ((flags & MOUNT_IN_NAMESPACE_IS_IMAGE) || S_ISDIR(chased_src_st->st_mode))
×
1166
                        (void) rmdir(mount_tmp);
×
1167
                else
1168
                        (void) unlink(mount_tmp);
×
1169
        }
1170

1171
        if (mount_slave_mounted)
×
1172
                (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW);
×
1173
        if (mount_slave_created)
×
1174
                (void) rmdir(mount_slave);
×
1175

1176
        return r;
×
1177
}
1178

1179
static int mount_in_namespace(
6✔
1180
                const PidRef *target,
1181
                const char *propagate_path,
1182
                const char *incoming_path,
1183
                const char *src,
1184
                const char *dest,
1185
                MountInNamespaceFlags flags,
1186
                const MountOptions *options,
1187
                const ImagePolicy *image_policy) {
1188

1189
        _cleanup_close_ int mntns_fd = -EBADF, root_fd = -EBADF, pidns_fd = -EBADF, chased_src_fd = -EBADF;
18✔
1190
        _cleanup_free_ char *chased_src_path = NULL;
6✔
1191
        struct stat st;
6✔
1192
        int r;
6✔
1193

1194
        assert(propagate_path);
6✔
1195
        assert(incoming_path);
6✔
1196
        assert(src);
6✔
1197
        assert(dest);
6✔
1198
        assert((flags & MOUNT_IN_NAMESPACE_IS_IMAGE) || (!options && !image_policy));
6✔
1199

1200
        if (!pidref_is_set(target))
12✔
1201
                return -ESRCH;
1202

1203
        r = pidref_namespace_open(target, &pidns_fd, &mntns_fd, /* ret_netns_fd= */ NULL, /* ret_userns_fd= */ NULL, &root_fd);
6✔
1204
        if (r < 0)
6✔
1205
                return log_debug_errno(r, "Failed to retrieve FDs of the target process' namespace: %m");
×
1206

1207
        r = is_our_namespace(mntns_fd, NAMESPACE_MOUNT);
6✔
1208
        if (r < 0)
6✔
1209
                return log_debug_errno(r, "Failed to determine if mount namespaces are equal: %m");
×
1210
        /* We can't add new mounts at runtime if the process wasn't started in a namespace */
1211
        if (r > 0)
6✔
1212
                return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to activate bind mount in target, not running in a mount namespace.");
×
1213

1214
        r = chase(src, NULL, 0, &chased_src_path, &chased_src_fd);
6✔
1215
        if (r < 0)
6✔
1216
                return log_debug_errno(r, "Failed to resolve source path '%s': %m", src);
×
1217
        log_debug("Chased source path '%s': %s", src, chased_src_path);
6✔
1218

1219
        if (fstat(chased_src_fd, &st) < 0)
6✔
1220
                return log_debug_errno(errno, "Failed to stat() resolved source path '%s': %m", src);
×
1221
        if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */
6✔
1222
                return log_debug_errno(SYNTHETIC_ERRNO(ELOOP), "Source path '%s' can't be a symbolic link.", src);
×
1223

1224
        if (!mount_new_api_supported()) /* Fallback if we can't use the new mount API */
6✔
1225
                return mount_in_namespace_legacy(
×
1226
                                chased_src_path,
1227
                                chased_src_fd,
1228
                                &st,
1229
                                propagate_path,
1230
                                incoming_path,
1231
                                dest,
1232
                                pidns_fd,
1233
                                mntns_fd,
1234
                                root_fd,
1235
                                flags,
1236
                                options,
1237
                                image_policy);
1238

1239
        _cleanup_(dissected_image_unrefp) DissectedImage *img = NULL;
×
1240
        _cleanup_close_ int new_mount_fd = -EBADF;
6✔
1241
        _cleanup_close_pair_ int errno_pipe_fd[2] = EBADF_PAIR;
6✔
1242
        _cleanup_(pidref_done) PidRef child = PIDREF_NULL;
6✔
1243

1244
        if (flags & MOUNT_IN_NAMESPACE_IS_IMAGE) {
6✔
1245
                r = verity_dissect_and_mount(
2✔
1246
                                chased_src_fd,
1247
                                chased_src_path,
1248
                                /* dest= */ NULL,
1249
                                options,
1250
                                image_policy,
1251
                                /* image_filter= */ NULL,
1252
                                /* extension_release_data= */ NULL,
1253
                                /* required_class= */ _IMAGE_CLASS_INVALID,
1254
                                /* verity= */ NULL,
1255
                                RUNTIME_SCOPE_SYSTEM,
1256
                                &img);
1257
                if (r < 0)
2✔
1258
                        return log_debug_errno(r,
×
1259
                                               "Failed to dissect and mount image '%s': %m",
1260
                                               chased_src_path);
1261
        } else {
1262
                new_mount_fd = open_tree(
4✔
1263
                                chased_src_fd,
1264
                                "",
1265
                                OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH);
1266
                if (new_mount_fd < 0)
4✔
1267
                        return log_debug_errno(
×
1268
                                        errno,
1269
                                        "Failed to open mount source '%s': %m",
1270
                                        chased_src_path);
1271

1272
                if ((flags & MOUNT_IN_NAMESPACE_READ_ONLY) && mount_setattr(new_mount_fd, "", AT_EMPTY_PATH,
4✔
1273
                                               &(struct mount_attr) {
×
1274
                                                       .attr_set = MOUNT_ATTR_RDONLY,
1275
                                               }, MOUNT_ATTR_SIZE_VER0) < 0)
1276
                        return log_debug_errno(errno,
×
1277
                                               "Failed to set mount for '%s' to read only: %m",
1278
                                               chased_src_path);
1279
        }
1280

1281
        if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0)
6✔
1282
                return log_debug_errno(errno, "Failed to create pipe: %m");
×
1283

1284
        r = namespace_fork("(sd-bindmnt)",
6✔
1285
                           "(sd-bindmnt-inner)",
1286
                           FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM,
1287
                           pidns_fd,
1288
                           mntns_fd,
1289
                           /* netns_fd= */ -EBADF,
1290
                           /* userns_fd= */ -EBADF,
1291
                           root_fd,
1292
                           &child);
1293
        if (r < 0)
12✔
1294
                return log_debug_errno(r, "Failed to fork off mount helper into namespace: %m");
×
1295
        if (r == 0) {
12✔
1296
                errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]);
6✔
1297

1298
                _cleanup_close_ int dest_fd = -EBADF;
×
1299
                _cleanup_free_ char *dest_fn = NULL;
×
1300
                r = chase(dest, /* root= */ NULL, CHASE_PARENT|CHASE_EXTRACT_FILENAME|((flags & MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY) ? CHASE_MKDIR_0755 : 0), &dest_fn, &dest_fd);
6✔
1301
                if (r < 0)
6✔
1302
                        report_errno_and_exit(errno_pipe_fd[1], r);
×
1303

1304
                if (flags & MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY)
6✔
1305
                        (void) make_mount_point_inode_from_mode(dest_fd, dest_fn, img ? S_IFDIR : st.st_mode, 0700);
6✔
1306

1307
                if (img) {
6✔
1308
                        DissectImageFlags f =
2✔
1309
                                DISSECT_IMAGE_TRY_ATOMIC_MOUNT_EXCHANGE |
1310
                                DISSECT_IMAGE_ALLOW_USERSPACE_VERITY;
1311

1312
                        if (flags & MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY)
2✔
1313
                                f |= DISSECT_IMAGE_MKDIR;
2✔
1314

1315
                        if (flags & MOUNT_IN_NAMESPACE_READ_ONLY)
2✔
1316
                                f |= DISSECT_IMAGE_READ_ONLY;
×
1317

1318
                        r = dissected_image_mount(
2✔
1319
                                        img,
1320
                                        dest,
1321
                                        /* uid_shift= */ UID_INVALID,
1322
                                        /* uid_range= */ UID_INVALID,
1323
                                        /* userns_fd= */ -EBADF,
1324
                                        f);
1325
                } else
1326
                        r = mount_exchange_graceful(new_mount_fd, dest, /* mount_beneath= */ true);
4✔
1327

1328
                report_errno_and_exit(errno_pipe_fd[1], r);
6✔
1329
        }
1330

1331
        errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]);
6✔
1332

1333
        r = pidref_wait_for_terminate_and_check("(sd-bindmnt)", &child, 0);
6✔
1334
        if (r < 0)
6✔
1335
                return log_debug_errno(r, "Failed to wait for child: %m");
×
1336
        if (r != EXIT_SUCCESS) {
6✔
1337
                if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r))
×
1338
                        return log_debug_errno(r, "Failed to mount into namespace: %m");
×
1339

1340
                return log_debug_errno(SYNTHETIC_ERRNO(EPROTO), "Child failed.");
×
1341
        }
1342

1343
        return 0;
1344
}
1345

1346
int bind_mount_in_namespace(
4✔
1347
                const PidRef *target,
1348
                const char *propagate_path,
1349
                const char *incoming_path,
1350
                const char *src,
1351
                const char *dest,
1352
                MountInNamespaceFlags flags) {
1353

1354
        return mount_in_namespace(target,
8✔
1355
                                  propagate_path,
1356
                                  incoming_path,
1357
                                  src,
1358
                                  dest,
1359
                                  flags & ~MOUNT_IN_NAMESPACE_IS_IMAGE,
4✔
1360
                                  /* options= */ NULL,
1361
                                  /* image_policy= */ NULL);
1362
}
1363

1364
int mount_image_in_namespace(
2✔
1365
                const PidRef *target,
1366
                const char *propagate_path,
1367
                const char *incoming_path,
1368
                const char *src,
1369
                const char *dest,
1370
                MountInNamespaceFlags flags,
1371
                const MountOptions *options,
1372
                const ImagePolicy *image_policy) {
1373

1374
        return mount_in_namespace(target,
4✔
1375
                                  propagate_path,
1376
                                  incoming_path,
1377
                                  src,
1378
                                  dest,
1379
                                  flags | MOUNT_IN_NAMESPACE_IS_IMAGE,
2✔
1380
                                  options,
1381
                                  image_policy);
1382
}
1383

1384
int make_mount_point(const char *path) {
27✔
1385
        int r;
27✔
1386

1387
        assert(path);
27✔
1388

1389
        /* If 'path' is already a mount point, does nothing and returns 0. If it is not it makes it one, and returns 1. */
1390

1391
        r = path_is_mount_point(path);
27✔
1392
        if (r < 0)
27✔
1393
                return log_debug_errno(r, "Failed to determine whether '%s' is a mount point: %m", path);
×
1394
        if (r > 0)
27✔
1395
                return 0;
1396

1397
        r = mount_nofollow_verbose(LOG_DEBUG, path, path, NULL, MS_BIND|MS_REC, NULL);
11✔
1398
        if (r < 0)
11✔
1399
                return r;
×
1400

1401
        return 1;
1402
}
1403

1404
int fd_make_mount_point(int fd) {
13✔
1405
        int r;
13✔
1406

1407
        assert(fd >= 0);
13✔
1408

1409
        r = is_mount_point_at(fd, NULL, 0);
13✔
1410
        if (r < 0)
13✔
1411
                return log_debug_errno(r, "Failed to determine whether file descriptor is a mount point: %m");
×
1412
        if (r > 0)
13✔
1413
                return 0;
1414

1415
        r = mount_follow_verbose(LOG_DEBUG, FORMAT_PROC_FD_PATH(fd), FORMAT_PROC_FD_PATH(fd), NULL, MS_BIND|MS_REC, NULL);
1✔
1416
        if (r < 0)
1✔
1417
                return r;
×
1418

1419
        return 1;
1420
}
1421

1422
int mount_fd_clone(int mount_fd, bool recursive, int *replacement_fd) {
71✔
1423
        const int flags = OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_EMPTY_PATH|(recursive ? AT_RECURSIVE : 0);
71✔
1424
        int r;
71✔
1425

1426
        assert(mount_fd >= 0);
71✔
1427

1428
        /* If the input mount fd is supposed to remain cloneable after calling this function, call it as
1429
         * follows: mount_fd_clone(mount_fd, recursive, &mount_fd). */
1430

1431
        /* Clone a detached mount (that may be owned by a foreign mountns, e.g. mountfsd's). For this to
1432
         * work on older kernels, we have to jump through some hoops, because the kernel currently doesn't
1433
         * allow us to just call open_tree(OPEN_TREE_CLONE) directly to get a clone of a mount that is
1434
         * detached and owned by another mountns. Hence here's what we do: we clone short-lived child in a
1435
         * new mount namespace owned by our userns. There, we attach the mount (invisible to anyone else).
1436
         * This is sufficient to pass the kernel check, so next we use open_tree(OPEN_TREE_CLONE) to get our
1437
         * own detached mount. This we send back to the parent, which then can use it. */
1438

1439
        r = RET_NERRNO(open_tree(mount_fd, "", flags));
71✔
1440
        if (r != -EINVAL)
71✔
1441
                /* The straightforward path just works? Yay! Don't bother with the complex logic below. No
1442
                 * need to put a replacement fd in replacement_fd as the original fd is still usable. */
1443
                return r;
×
1444

1445
        _cleanup_close_pair_ int transfer_fds[2] = EBADF_PAIR;
57✔
1446
        r = socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, transfer_fds);
71✔
1447
        if (r < 0)
71✔
1448
                return log_debug_errno(errno, "Failed to create socket pair: %m");
×
1449

1450
        _cleanup_close_pair_ int errno_pipe_fds[2] = EBADF_PAIR;
57✔
1451
        if (pipe2(errno_pipe_fds, O_CLOEXEC|O_NONBLOCK) < 0)
71✔
1452
                return log_debug_errno(errno, "Failed to open pipe: %m");
×
1453

1454
        /* Fork a child. Note that we set FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE here, i.e. get a new mount namespace */
1455
        r = pidref_safe_fork_full(
142✔
1456
                        "(sd-clonemnt)",
1457
                        /* stdio_fds= */ NULL,
1458
                        (int[]) { mount_fd, transfer_fds[1], errno_pipe_fds[1] }, 3,
71✔
1459
                        FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_REOPEN_LOG|FORK_WAIT|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE,
1460
                        /* ret= */ NULL);
1461
        if (r < 0) {
71✔
1462
                errno_pipe_fds[1] = safe_close(errno_pipe_fds[1]);
×
1463

1464
                int q = read_errno(errno_pipe_fds[0]);
×
1465
                if (q < 0 && q != -EIO)
×
1466
                        return q;
1467

1468
                return r;
×
1469
        }
1470
        if (r == 0) { /* Child */
71✔
1471

1472
                /* Attach mount */
1473
                if (move_mount(mount_fd, "", -EBADF, "/", MOVE_MOUNT_F_EMPTY_PATH) < 0) {
14✔
1474
                        log_debug_errno(errno, "Failed to move mount file descriptor to '/': %m");
×
1475
                        report_errno_and_exit(errno_pipe_fds[1], -errno);
×
1476
                }
1477

1478
                /* If requested by the caller, we clone the fd twice. Why? After move_mount(), the input file
1479
                 * descriptor can't be move_mount()'ed again, which means we can't clone it again if it comes
1480
                 * from a different mount namespace. To ensure they can clone the same fd multiple times,
1481
                 * callers can pass a pointer to the input fd which will be replaced with a second clone,
1482
                 * which can be move_mount()'ed and thus can be cloned again. */
1483

1484
                for (int i = 0; i < 1 + !!replacement_fd; i++) {
55✔
1485
                        /* And now clone the attached mount that is now ours. */
1486
                        _cleanup_close_ int cloned_fd = open_tree(mount_fd, "", flags);
30✔
1487
                        if (cloned_fd < 0) {
15✔
1488
                                log_debug_errno(errno, "Failed to clone mount file descriptor: %m");
×
1489
                                report_errno_and_exit(errno_pipe_fds[1], -errno);
×
1490
                        }
1491

1492
                        /* And send it to the parent. */
1493
                        r = send_one_fd(transfer_fds[1], cloned_fd, /* flags= */ 0);
15✔
1494
                        if (r < 0)
15✔
1495
                                report_errno_and_exit(errno_pipe_fds[1], r);
×
1496
                }
1497

1498
                _exit(EXIT_SUCCESS);
14✔
1499
        }
1500

1501
        transfer_fds[1] = safe_close(transfer_fds[1]);
57✔
1502

1503
        /* Accept the new cloned mount */
1504
        _cleanup_close_ int fd1 = receive_one_fd(transfer_fds[0], 0);
114✔
1505
        if (fd1 < 0)
57✔
1506
                return fd1;
1507

1508
        if (replacement_fd) {
57✔
1509
                int fd2 = receive_one_fd(transfer_fds[0], 0);
17✔
1510
                if (fd2 < 0)
17✔
1511
                        return fd2;
57✔
1512

1513
                close_and_replace(*replacement_fd, fd2);
17✔
1514
        }
1515

1516
        return TAKE_FD(fd1);
1517
}
1518

1519
int make_userns(uid_t uid_shift,
126✔
1520
                uid_t uid_range,
1521
                uid_t source_owner,
1522
                uid_t dest_owner,
1523
                RemountIdmapping idmapping) {
1524

1525
        _cleanup_close_ int userns_fd = -EBADF;
126✔
1526
        _cleanup_free_ char *line = NULL;
126✔
1527
        uid_t source_base = 0;
126✔
1528

1529
        /* Allocates a userns file descriptor with the mapping we need. For this we'll fork off a child
1530
         * process whose only purpose is to give us a new user namespace. It's killed when we got it. */
1531

1532
        if (!userns_shift_range_valid(uid_shift, uid_range))
126✔
1533
                return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid UID range for user namespace.");
×
1534

1535
        switch (idmapping) {
126✔
1536

1537
        case REMOUNT_IDMAPPING_FOREIGN_WITH_HOST_ROOT:
2✔
1538
                source_base = FOREIGN_UID_BASE;
2✔
1539
                _fallthrough_;
86✔
1540

1541
        case REMOUNT_IDMAPPING_NONE:
86✔
1542
        case REMOUNT_IDMAPPING_HOST_ROOT:
1543

1544
                if (asprintf(&line,
86✔
1545
                             UID_FMT " " UID_FMT " " UID_FMT "\n",
1546
                             source_base, uid_shift, uid_range) < 0)
1547
                        return log_oom_debug();
×
1548

1549
                /* If requested we'll include an entry in the mapping so that the host root user can make
1550
                 * changes to the uidmapped mount like it normally would. Specifically, we'll map the user
1551
                 * with UID_MAPPED_ROOT on the backing fs to UID 0. This is useful, since nspawn code wants
1552
                 * to create various missing inodes in the OS tree before booting into it, and this becomes
1553
                 * very easy and straightforward to do if it can just do it under its own regular UID. Note
1554
                 * that in that case the container's runtime uidmap (i.e. the one the container payload
1555
                 * processes run in) will leave this UID unmapped, i.e. if we accidentally leave files owned
1556
                 * by host root in the already uidmapped tree around they'll show up as owned by 'nobody',
1557
                 * which is safe. (Of course, we shouldn't leave such inodes around, but always chown() them
1558
                 * to the container's own UID range, but it's good to have a safety net, in case we
1559
                 * forget it.) */
1560
                if (idmapping == REMOUNT_IDMAPPING_HOST_ROOT)
86✔
1561
                        if (strextendf(&line,
84✔
1562
                                       UID_FMT " " UID_FMT " " UID_FMT "\n",
1563
                                       UID_MAPPED_ROOT, (uid_t) 0u, (uid_t) 1u) < 0)
1564
                                return log_oom_debug();
×
1565

1566
                break;
1567

1568
        case REMOUNT_IDMAPPING_HOST_OWNER:
8✔
1569
                /* Remap the owner of the bind mounted directory to the root user within the container. This
1570
                 * way every file written by root within the container to the bind-mounted directory will
1571
                 * be owned by the original user from the host. All other users will remain unmapped. */
1572
                if (asprintf(&line,
8✔
1573
                             UID_FMT " " UID_FMT " " UID_FMT "\n",
1574
                             source_owner, uid_shift, (uid_t) 1u) < 0)
1575
                        return log_oom_debug();
×
1576
                break;
1577

1578
        case REMOUNT_IDMAPPING_HOST_OWNER_TO_TARGET_OWNER:
32✔
1579
                /* Remap the owner of the bind mounted directory to the owner of the target directory
1580
                 * within the container. This way every file written by target directory owner within the
1581
                 * container to the bind-mounted directory will be owned by the original host user.
1582
                 * All other users will remain unmapped. */
1583
                if (asprintf(&line,
32✔
1584
                             UID_FMT " " UID_FMT " " UID_FMT "\n",
1585
                             source_owner, dest_owner, (uid_t) 1u) < 0)
1586
                        return log_oom_debug();
×
1587
                break;
1588

1589
        default:
×
1590
                assert_not_reached();
×
1591
        }
1592

1593
        /* We always assign the same UID and GID ranges */
1594
        userns_fd = userns_acquire(line, line, /* setgroups_deny= */ true);
126✔
1595
        if (userns_fd < 0)
126✔
1596
                return log_debug_errno(userns_fd, "Failed to acquire new userns: %m");
×
1597

1598
        return TAKE_FD(userns_fd);
1599
}
1600

1601
int open_tree_attr_with_fallback(int dir_fd, const char *path, unsigned flags, struct mount_attr *attr) {
220✔
1602
        _cleanup_close_ int fd = -EBADF;
220✔
1603

1604
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
220✔
1605
        assert(attr);
220✔
1606

1607
        if (isempty(path)) {
220✔
1608
                path = "";
43✔
1609
                flags |= AT_EMPTY_PATH;
43✔
1610
        }
1611

1612
        fd = open_tree_attr(dir_fd, path, flags, attr, sizeof(struct mount_attr));
220✔
1613
        if (fd >= 0)
220✔
1614
                return TAKE_FD(fd);
220✔
1615
        if (!ERRNO_IS_NOT_SUPPORTED(errno))
41✔
1616
                return log_debug_errno(errno, "Failed to open tree and set mount attributes: %m");
2✔
1617

1618
        if (attr->attr_clr & MOUNT_ATTR_IDMAP)
39✔
1619
                return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Cannot clear idmap from mount without open_tree_attr()");
×
1620

1621
        fd = open_tree(dir_fd, path, flags);
39✔
1622
        if (fd < 0)
39✔
1623
                return log_debug_errno(errno, "Failed to open tree: %m");
×
1624

1625
        if (mount_setattr(fd, "", AT_EMPTY_PATH | (flags & AT_RECURSIVE), attr, sizeof(struct mount_attr)) < 0)
39✔
1626
                return log_debug_errno(errno, "Failed to change mount attributes: %m");
×
1627

1628
        return TAKE_FD(fd);
1629
}
1630

1631
int open_tree_try_drop_idmap(int dir_fd, const char *path, unsigned flags) {
40✔
1632
        /* Tries to drop MOUNT_ATTR_IDMAP while calling open_tree_attr(), but if that doesn't work just uses
1633
         * a regular open_tree() */
1634

1635
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
40✔
1636

1637
        if (isempty(path)) {
40✔
1638
                path = "";
×
1639
                flags |= AT_EMPTY_PATH;
×
1640
        }
1641

1642
        _cleanup_close_ int fd = open_tree_attr_with_fallback(
40✔
1643
                        dir_fd,
1644
                        path,
1645
                        flags,
1646
                        &(struct mount_attr) {
40✔
1647
                                .attr_clr = MOUNT_ATTR_IDMAP,
1648
                        });
1649
        if (fd < 0) {
40✔
1650
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(fd))
×
1651
                        return log_debug_errno(fd, "Failed to clear idmap of directory with open_tree_attr(): %m");
×
1652

1653
                log_debug_errno(fd, "Failed to clear idmap with open_tree_attr(), retrying open_tree() without clearing idmap: %m");
×
1654

1655
                fd = RET_NERRNO(open_tree(dir_fd, path, flags));
×
1656
                if (fd < 0)
×
1657
                        return log_debug_errno(fd, "Both open_tree() and open_tree_attr() failed, giving up: %m");
×
1658

1659
                log_debug("open_tree() without clearing idmap worked.");
×
1660
                return TAKE_FD(fd);
×
1661
        }
1662

1663
        log_debug("Successfully acquired mount fd with cleared idmap.");
40✔
1664
        return TAKE_FD(fd);
1665
}
1666

1667
int remount_idmap_fd(
137✔
1668
                char **paths,
1669
                int userns_fd,
1670
                uint64_t extra_mount_attr_set) {
1671

1672
        int r;
137✔
1673

1674
        assert(userns_fd >= 0);
137✔
1675

1676
        /* This remounts all specified paths with the specified userns as idmap. It will do so in the
1677
         * order specified in the strv: the expectation is that the top-level directories are at the
1678
         * beginning, and nested directories in the right, so that the tree can be built correctly from left
1679
         * to right. */
1680

1681
        size_t n = strv_length(paths);
137✔
1682
        if (n == 0) /* Nothing to do? */
137✔
1683
                return 0;
137✔
1684

1685
        int *mount_fds = NULL;
137✔
1686
        size_t n_mounts_fds = 0;
137✔
1687

1688
        mount_fds = new(int, n);
137✔
1689
        if (!mount_fds)
137✔
1690
                return log_oom_debug();
×
1691

1692
        CLEANUP_ARRAY(mount_fds, n_mounts_fds, close_many_and_free);
137✔
1693

1694
        for (size_t i = 0; i < n; i++) {
272✔
1695
                /* Clone the mount point and et the user namespace mapping attribute on the cloned mount point. */
1696
                mount_fds[n_mounts_fds] = open_tree_attr_with_fallback(
274✔
1697
                                AT_FDCWD,
1698
                                paths[i],
137✔
1699
                                OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC,
1700
                                &(struct mount_attr) {
137✔
1701
                                          .attr_set = MOUNT_ATTR_IDMAP | extra_mount_attr_set,
137✔
1702
                                          .userns_fd = userns_fd,
1703
                                });
1704
                if (mount_fds[n_mounts_fds] < 0)
137✔
1705
                        return mount_fds[n_mounts_fds];
2✔
1706

1707
                n_mounts_fds++;
135✔
1708
        }
1709

1710
        for (size_t i = n; i > 0; i--) { /* Unmount the paths right-to-left */
270✔
1711
                /* Remove the old mount points now that we have a idmapped mounts as replacement for all of them */
1712
                r = umount_verbose(LOG_DEBUG, paths[i-1], UMOUNT_NOFOLLOW);
135✔
1713
                if (r < 0)
135✔
1714
                        return r;
1715
        }
1716

1717
        for (size_t i = 0; i < n; i++) { /* Mount the replacement mounts left-to-right */
270✔
1718
                /* And place the cloned version in its place */
1719
                log_debug("Mounting idmapped fs to '%s'", paths[i]);
135✔
1720
                if (move_mount(mount_fds[i], "", -EBADF, paths[i], MOVE_MOUNT_F_EMPTY_PATH) < 0)
135✔
1721
                        return log_debug_errno(errno, "Failed to attach UID mapped mount to '%s': %m", paths[i]);
×
1722
        }
1723

1724
        return 0;
1725
}
1726

1727
int remount_idmap(
124✔
1728
                char **p,
1729
                uid_t uid_shift,
1730
                uid_t uid_range,
1731
                uid_t source_owner,
1732
                uid_t dest_owner,
1733
                RemountIdmapping idmapping) {
1734

1735
        _cleanup_close_ int userns_fd = -EBADF;
124✔
1736

1737
        userns_fd = make_userns(uid_shift, uid_range, source_owner, dest_owner, idmapping);
124✔
1738
        if (userns_fd < 0)
124✔
1739
                return userns_fd;
1740

1741
        return remount_idmap_fd(p, userns_fd, /* extra_mount_attr_set= */ 0);
124✔
1742
}
1743

1744
static void sub_mount_clear(SubMount *s) {
7,784✔
1745
        assert(s);
7,784✔
1746

1747
        s->path = mfree(s->path);
7,784✔
1748
        s->mount_fd = safe_close(s->mount_fd);
7,784✔
1749
}
7,784✔
1750

1751
void sub_mount_array_free(SubMount *s, size_t n) {
1,241✔
1752
        assert(s || n == 0);
1,241✔
1753

1754
        for (size_t i = 0; i < n; i++)
8,206✔
1755
                sub_mount_clear(s + i);
6,965✔
1756

1757
        free(s);
1,241✔
1758
}
1,241✔
1759

1760
#if HAVE_LIBMOUNT
1761
static int sub_mount_compare(const SubMount *a, const SubMount *b) {
16,309✔
1762
        assert(a);
16,309✔
1763
        assert(b);
16,309✔
1764
        assert(a->path);
16,309✔
1765
        assert(b->path);
16,309✔
1766

1767
        return path_compare(a->path, b->path);
16,309✔
1768
}
1769

1770
static void sub_mount_drop(SubMount *s, size_t n) {
2,159✔
1771
        assert(s || n == 0);
2,159✔
1772

1773
        for (size_t m = 0, i = 1; i < n; i++) {
7,883✔
1774
                if (path_startswith(s[i].path, s[m].path))
5,724✔
1775
                        sub_mount_clear(s + i);
819✔
1776
                else
1777
                        m = i;
1778
        }
1779
}
2,159✔
1780
#endif
1781

1782
int get_sub_mounts(const char *prefix, SubMount **ret_mounts, size_t *ret_n_mounts) {
2,159✔
1783
#if HAVE_LIBMOUNT
1784
        _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
2,159✔
1785
        _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
2,159✔
1786
        SubMount *mounts = NULL;
2,159✔
1787
        size_t n = 0;
2,159✔
1788
        int r;
2,159✔
1789

1790
        CLEANUP_ARRAY(mounts, n, sub_mount_array_free);
2,159✔
1791

1792
        assert(prefix);
2,159✔
1793
        assert(ret_mounts);
2,159✔
1794
        assert(ret_n_mounts);
2,159✔
1795

1796
        r = libmount_parse_mountinfo(/* source= */ NULL, &table, &iter);
2,159✔
1797
        if (r < 0)
2,159✔
1798
                return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
×
1799

1800
        for (;;) {
109,869✔
1801
                _cleanup_close_ int mount_fd = -EBADF;
107,710✔
1802
                _cleanup_free_ char *p = NULL;
109,869✔
1803
                struct libmnt_fs *fs;
109,869✔
1804
                const char *path;
109,869✔
1805
                int id1, id2;
109,869✔
1806

1807
                r = sym_mnt_table_next_fs(table, iter, &fs);
109,869✔
1808
                if (r == 1)
109,869✔
1809
                        break; /* EOF */
1810
                if (r < 0)
107,710✔
1811
                        return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
×
1812

1813
                path = sym_mnt_fs_get_target(fs);
107,710✔
1814
                if (!path)
107,710✔
1815
                        continue;
×
1816

1817
                if (isempty(path_startswith(path, prefix)))
107,710✔
1818
                        continue;
99,231✔
1819

1820
                id1 = sym_mnt_fs_get_id(fs);
8,479✔
1821
                r = path_get_mnt_id(path, &id2);
8,479✔
1822
                if (r < 0) {
8,479✔
1823
                        log_debug_errno(r, "Failed to get mount ID of '%s', ignoring: %m", path);
×
1824
                        continue;
×
1825
                }
1826
                if (id1 != id2) {
8,479✔
1827
                        /* The path may be hidden by another over-mount or already remounted. */
1828
                        log_debug("The mount IDs of '%s' obtained by libmount and path_get_mnt_id() are different (%i vs %i), ignoring.",
1,514✔
1829
                                  path, id1, id2);
1830
                        continue;
1,514✔
1831
                }
1832

1833
                mount_fd = RET_NERRNO(open_tree(AT_FDCWD, path, OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_RECURSIVE));
6,965✔
1834
                if (mount_fd == -ENOENT) /* The path may be hidden by another over-mount or already unmounted. */
×
1835
                        continue;
×
1836
                if (mount_fd < 0)
6,965✔
1837
                        return log_debug_errno(mount_fd, "Failed to open subtree of mounted filesystem '%s': %m", path);
×
1838

1839
                p = strdup(path);
6,965✔
1840
                if (!p)
6,965✔
1841
                        return log_oom_debug();
×
1842

1843
                if (!GREEDY_REALLOC(mounts, n + 1))
6,965✔
1844
                        return log_oom_debug();
×
1845

1846
                mounts[n++] = (SubMount) {
6,965✔
1847
                        .path = TAKE_PTR(p),
6,965✔
1848
                        .mount_fd = TAKE_FD(mount_fd),
6,965✔
1849
                };
1850
        }
1851

1852
        typesafe_qsort(mounts, n, sub_mount_compare);
2,159✔
1853
        sub_mount_drop(mounts, n);
2,159✔
1854

1855
        *ret_mounts = TAKE_PTR(mounts);
2,159✔
1856
        *ret_n_mounts = n;
2,159✔
1857
        return 0;
2,159✔
1858
#else
1859
        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "libmount support not compiled in");
1860
#endif
1861
}
1862

1863
int bind_mount_submounts(
1,249✔
1864
                const char *source,
1865
                const char *target) {
1866

1867
        SubMount *mounts = NULL;
1,249✔
1868
        size_t n = 0;
1,249✔
1869
        int ret = 0, r;
1,249✔
1870

1871
        /* Bind mounts all child mounts of 'source' to 'target'. Useful when setting up a new procfs instance
1872
         * with new mount options to copy the original submounts over. */
1873

1874
        assert(source);
1,249✔
1875
        assert(target);
1,249✔
1876

1877
        CLEANUP_ARRAY(mounts, n, sub_mount_array_free);
1,249✔
1878

1879
        r = get_sub_mounts(source, &mounts, &n);
1,249✔
1880
        if (r < 0)
1,249✔
1881
                return r;
1882

1883
        FOREACH_ARRAY(m, mounts, n) {
8,210✔
1884
                _cleanup_free_ char *t = NULL;
6,961✔
1885
                const char *suffix;
6,961✔
1886

1887
                if (isempty(m->path))
6,961✔
1888
                        continue;
819✔
1889

1890
                assert_se(suffix = path_startswith(m->path, source));
6,142✔
1891

1892
                t = path_join(target, suffix);
6,142✔
1893
                if (!t)
6,142✔
1894
                        return -ENOMEM;
×
1895

1896
                r = path_is_mount_point(t);
6,142✔
1897
                if (r < 0) {
6,142✔
1898
                        log_debug_errno(r, "Failed to detect if '%s' already is a mount point, ignoring: %m", t);
16✔
1899
                        continue;
16✔
1900
                }
1901
                if (r > 0) {
6,126✔
1902
                        log_debug("Not bind mounting '%s' from '%s' to '%s', since there's already a mountpoint.", suffix, source, target);
×
1903
                        continue;
×
1904
                }
1905

1906
                RET_GATHER(ret, RET_NERRNO(move_mount(m->mount_fd, "", AT_FDCWD, t, MOVE_MOUNT_F_EMPTY_PATH)));
6,126✔
1907
        }
1908

1909
        return ret;
1910
}
1911

1912
int make_mount_point_inode_from_mode(int dir_fd, const char *dest, mode_t source_mode, mode_t target_mode) {
977✔
1913
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
977✔
1914
        assert(dest);
977✔
1915

1916
        if (S_ISDIR(source_mode))
977✔
1917
                return mkdirat_label(dir_fd, dest, target_mode & 07777);
946✔
1918
        else
1919
                return RET_NERRNO(mknodat(dir_fd, dest, S_IFREG|(target_mode & 07666), 0)); /* Mask off X bit */
32✔
1920
}
1921

1922
int make_mount_point_inode_from_path(const char *source, const char *dest, mode_t access_mode) {
780✔
1923
        struct stat st;
780✔
1924

1925
        assert(source);
780✔
1926
        assert(dest);
780✔
1927

1928
        if (stat(source, &st) < 0)
780✔
1929
                return -errno;
×
1930

1931
        return make_mount_point_inode_from_mode(AT_FDCWD, dest, st.st_mode, access_mode);
780✔
1932
}
1933

1934
int trigger_automount_at(int dir_fd, const char *path) {
508✔
1935
        _cleanup_free_ char *nested = NULL;
1,016✔
1936

1937
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
508✔
1938

1939
        nested = path_join(path, "a");
508✔
1940
        if (!nested)
508✔
1941
                return -ENOMEM;
1942

1943
        (void) faccessat(dir_fd, nested, F_OK, 0);
508✔
1944

1945
        return 0;
508✔
1946
}
1947

1948
unsigned long credentials_fs_mount_flags(bool ro) {
2,101✔
1949
        /* A tight set of mount flags for credentials mounts */
1950
        return MS_NODEV|MS_NOEXEC|MS_NOSUID|MS_NOSYMFOLLOW|(ro ? MS_RDONLY : 0);
2,101✔
1951
}
1952

1953
int fsmount_credentials_fs(int *ret_fsfd) {
2,094✔
1954
        _cleanup_close_ int fs_fd = -EBADF;
2,094✔
1955
        char size_str[DECIMAL_STR_MAX(uint64_t)];
2,094✔
1956

1957
        /* Mounts a file system we can place credentials in, i.e. with tight access modes right from the
1958
         * beginning, and ideally swapping turned off. In order of preference:
1959
         *
1960
         *      1. tmpfs if it supports "noswap" (needs kernel >= 6.3)
1961
         *      2. ramfs
1962
         *      3. tmpfs without "noswap"
1963
         */
1964

1965
        fs_fd = fsopen("tmpfs", FSOPEN_CLOEXEC);
2,094✔
1966
        if (fs_fd < 0)
2,094✔
1967
                return -errno;
7✔
1968

1969
        if (fsconfig(fs_fd, FSCONFIG_SET_STRING, "nr_inodes", "1024", 0) < 0)
2,087✔
1970
                return -errno;
×
1971

1972
        xsprintf(size_str, "%" PRIu64, CREDENTIALS_TOTAL_SIZE_MAX);
2,087✔
1973
        if (fsconfig(fs_fd, FSCONFIG_SET_STRING, "size", size_str, 0) < 0)
2,087✔
1974
                return -errno;
×
1975

1976
        if (fsconfig(fs_fd, FSCONFIG_SET_FLAG, "noswap", NULL, 0) < 0) {
2,087✔
1977
                if (errno != EINVAL)
×
1978
                        return -errno;
×
1979

1980
                int ramfs_fd = fsopen("ramfs", FSOPEN_CLOEXEC);
×
1981
                if (ramfs_fd >= 0)
×
1982
                        close_and_replace(fs_fd, ramfs_fd);
×
1983
        }
1984

1985
        if (fsconfig(fs_fd, FSCONFIG_SET_STRING, "mode", "0700", 0) < 0)
2,087✔
1986
                return -errno;
×
1987

1988
        if (fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0)
2,087✔
1989
                return -errno;
×
1990

1991
        unsigned mount_attrs = ms_flags_to_mount_attr(credentials_fs_mount_flags(/* ro = */ false));
2,087✔
1992

1993
        int mfd = RET_NERRNO(fsmount(fs_fd, FSMOUNT_CLOEXEC, mount_attrs));
2,087✔
1994
        if (mfd == -EINVAL) {
×
1995
                /* MS_NOSYMFOLLOW was added in kernel 5.10, but the new mount API counterpart was missing
1996
                 * until 5.14 (c.f. https://github.com/torvalds/linux/commit/dd8b477f9a3d8edb136207acb3652e1a34a661b7).
1997
                 *
1998
                 * TODO: drop this once our baseline is raised to 5.14 */
1999
                assert(FLAGS_SET(mount_attrs, MOUNT_ATTR_NOSYMFOLLOW));
×
2000
                mfd = RET_NERRNO(fsmount(fs_fd, FSMOUNT_CLOEXEC, mount_attrs & ~MOUNT_ATTR_NOSYMFOLLOW));
×
2001
        }
2002
        if (mfd < 0)
2,087✔
2003
                return mfd;
2004

2005
        if (ret_fsfd)
2,087✔
2006
                *ret_fsfd = TAKE_FD(fs_fd);
2,073✔
2007

2008
        return mfd;
2009
}
2010

2011
int mount_credentials_fs(const char *path) {
14✔
2012
        _cleanup_close_ int mfd = -EBADF;
14✔
2013

2014
        assert(path);
14✔
2015

2016
        mfd = fsmount_credentials_fs(/* ret_fsfd= */ NULL);
14✔
2017
        if (mfd < 0)
14✔
2018
                return mfd;
2019

2020
        return RET_NERRNO(move_mount(mfd, "", AT_FDCWD, path, MOVE_MOUNT_F_EMPTY_PATH));
14✔
2021
}
2022

2023
int make_fsmount(
1,263✔
2024
                int error_log_level,
2025
                const char *what,
2026
                const char *type,
2027
                unsigned long flags,
2028
                const char *options,
2029
                int userns_fd) {
2030

2031
        _cleanup_close_ int fs_fd = -EBADF, mnt_fd = -EBADF;
1,263✔
2032
        _cleanup_free_ char *o = NULL;
1,263✔
2033
        unsigned long f;
1,263✔
2034
        int r;
1,263✔
2035

2036
        assert(type);
1,263✔
2037
        assert(what);
1,263✔
2038

2039
        r = mount_option_mangle(options, flags, &f, &o);
1,263✔
2040
        if (r < 0)
1,263✔
2041
                return log_full_errno(
×
2042
                                error_log_level, r, "Failed to mangle mount options %s: %m",
2043
                                strempty(options));
2044

2045
        if (DEBUG_LOGGING) {
1,263✔
2046
                _cleanup_free_ char *fl = NULL;
1,263✔
2047
                (void) mount_flags_to_string(f, &fl);
1,263✔
2048

2049
                log_debug("Creating mount fd for %s (%s) (%s \"%s\")...",
1,721✔
2050
                        strna(what), strna(type), strnull(fl), strempty(o));
2051
        }
2052

2053
        fs_fd = fsopen(type, FSOPEN_CLOEXEC);
1,263✔
2054
        if (fs_fd < 0)
1,263✔
2055
                return log_full_errno(error_log_level, errno, "Failed to open superblock for \"%s\": %m", type);
×
2056

2057
        if (fsconfig(fs_fd, FSCONFIG_SET_STRING, "source", what, 0) < 0)
1,263✔
2058
                return log_full_errno(error_log_level, errno, "Failed to set mount source for \"%s\" to \"%s\": %m", type, what);
×
2059

2060
        if (FLAGS_SET(f, MS_RDONLY))
1,263✔
2061
                if (fsconfig(fs_fd, FSCONFIG_SET_FLAG, "ro", NULL, 0) < 0)
2✔
2062
                        return log_full_errno(error_log_level, errno, "Failed to set read only mount flag for \"%s\": %m", type);
×
2063

2064
        for (const char *p = o;;) {
1,263✔
2065
                _cleanup_free_ char *word = NULL;
810✔
2066
                char *eq;
2,073✔
2067

2068
                r = extract_first_word(&p, &word, ",", EXTRACT_KEEP_QUOTE);
2,073✔
2069
                if (r < 0)
2,073✔
2070
                        return log_full_errno(error_log_level, r, "Failed to parse mount option string \"%s\": %m", o);
×
2071
                if (r == 0)
2,073✔
2072
                        break;
2073

2074
                eq = strchr(word, '=');
810✔
2075
                if (eq) {
810✔
2076
                        *eq = 0;
810✔
2077
                        eq++;
810✔
2078

2079
                        if (fsconfig(fs_fd, FSCONFIG_SET_STRING, word, eq, 0) < 0)
810✔
2080
                                return log_full_errno(error_log_level, errno, "Failed to set mount option \"%s=%s\" for \"%s\": %m", word, eq, type);
×
2081
                } else {
2082
                        if (fsconfig(fs_fd, FSCONFIG_SET_FLAG, word, NULL, 0) < 0)
×
2083
                                return log_full_errno(error_log_level, errno, "Failed to set mount flag \"%s\" for \"%s\": %m", word, type);
×
2084
                }
2085
        }
2086

2087
        if (fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0)
1,263✔
2088
                return log_full_errno(error_log_level, errno, "Failed to realize fs fd for \"%s\" (\"%s\"): %m", what, type);
10✔
2089

2090
        mnt_fd = fsmount(fs_fd, FSMOUNT_CLOEXEC, 0);
1,253✔
2091
        if (mnt_fd < 0)
1,253✔
2092
                return log_full_errno(error_log_level, errno, "Failed to create mount fd for \"%s\" (\"%s\"): %m", what, type);
3✔
2093

2094
        struct mount_attr ma = {
2,500✔
2095
                .attr_clr = ms_flags_to_mount_attr_clr(f),
1,250✔
2096
                .attr_set = ms_flags_to_mount_attr(f) | (userns_fd >= 0 ? MOUNT_ATTR_IDMAP : 0),
1,250✔
2097
                .userns_fd = userns_fd,
2098
        };
2099
        if (ma.attr_set != 0 && mount_setattr(mnt_fd, "", AT_EMPTY_PATH|AT_RECURSIVE, &ma, MOUNT_ATTR_SIZE_VER0) < 0)
1,250✔
2100
                return log_full_errno(error_log_level,
×
2101
                                      errno,
2102
                                      "Failed to set mount flags for \"%s\" (\"%s\"): %m",
2103
                                      what,
2104
                                      type);
2105

2106
        return TAKE_FD(mnt_fd);
2107
}
2108

2109
char* umount_and_rmdir_and_free(char *p) {
171✔
2110
        if (!p)
171✔
2111
                return NULL;
171✔
2112

2113
        PROTECT_ERRNO;
×
2114
        (void) umount_recursive(p, 0);
171✔
2115
        (void) rmdir(p);
171✔
2116
        return mfree(p);
171✔
2117
}
2118

2119
char* umount_and_free(char *p) {
45✔
2120
        if (!p)
45✔
2121
                return NULL;
45✔
2122

2123
        PROTECT_ERRNO;
×
2124
        (void) umount_recursive(p, 0);
45✔
2125
        return mfree(p);
45✔
2126
}
2127

2128
char* umount_and_unlink_and_free(char *p) {
1✔
2129
        if (!p)
1✔
2130
                return NULL;
1✔
2131

2132
        PROTECT_ERRNO;
×
2133
        (void) umount2(p, 0);
1✔
2134
        (void) unlink(p);
1✔
2135
        return mfree(p);
1✔
2136
}
2137

2138
int path_get_mount_info_at(
793✔
2139
                int dir_fd,
2140
                const char *path,
2141
                char **ret_fstype,
2142
                char **ret_options,
2143
                char **ret_source) {
2144

2145
#if HAVE_LIBMOUNT
2146
        _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
793✔
2147
        _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
793✔
2148
        int r, mnt_id;
793✔
2149

2150
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
793✔
2151

2152
        r = path_get_mnt_id_at(dir_fd, path, &mnt_id);
793✔
2153
        if (r < 0)
793✔
2154
                return log_debug_errno(r, "Failed to get mount ID: %m");
×
2155

2156
        /* When getting options is requested, we also need to parse utab, otherwise userspace options like
2157
         * "_netdev" will be lost. */
2158
        if (ret_options)
793✔
2159
                r = libmount_parse_with_utab(&table, &iter);
783✔
2160
        else
2161
                r = libmount_parse_mountinfo(/* source= */ NULL, &table, &iter);
10✔
2162
        if (r < 0)
793✔
2163
                return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
×
2164

2165
        for (;;) {
5,005✔
2166
                struct libmnt_fs *fs;
2,899✔
2167

2168
                r = sym_mnt_table_next_fs(table, iter, &fs);
2,899✔
2169
                if (r == 1)
2,899✔
2170
                        break; /* EOF */
2171
                if (r < 0)
2,899✔
2172
                        return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
793✔
2173

2174
                if (sym_mnt_fs_get_id(fs) != mnt_id)
2,899✔
2175
                        continue;
2,106✔
2176

2177
                _cleanup_free_ char *fstype = NULL, *options = NULL, *source = NULL;
793✔
2178

2179
                if (ret_fstype) {
793✔
2180
                        fstype = strdup(strempty(sym_mnt_fs_get_fstype(fs)));
783✔
2181
                        if (!fstype)
783✔
2182
                                return log_oom_debug();
×
2183
                }
2184

2185
                if (ret_options) {
793✔
2186
                        options = strdup(strempty(sym_mnt_fs_get_options(fs)));
783✔
2187
                        if (!options)
783✔
2188
                                return log_oom_debug();
×
2189
                }
2190

2191
                if (ret_source) {
793✔
2192
                        source = strdup(strempty(sym_mnt_fs_get_source(fs)));
10✔
2193
                        if (!source)
10✔
2194
                                return log_oom_debug();
×
2195
                }
2196

2197
                if (ret_fstype)
793✔
2198
                        *ret_fstype = TAKE_PTR(fstype);
783✔
2199
                if (ret_options)
793✔
2200
                        *ret_options = TAKE_PTR(options);
783✔
2201
                if (ret_source)
793✔
2202
                        *ret_source = TAKE_PTR(source);
10✔
2203

2204
                return 0;
2205
        }
2206

2207
        return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Cannot find mount ID %i from /proc/self/mountinfo.", mnt_id);
×
2208
#else
2209
        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "libmount support not compiled in");
2210
#endif
2211
}
2212

2213
int path_is_network_fs_harder_at(int dir_fd, const char *path) {
813✔
2214
        _cleanup_close_ int fd = -EBADF;
813✔
2215
        int r;
813✔
2216

2217
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
813✔
2218

2219
        fd = xopenat(dir_fd, path, O_PATH | O_CLOEXEC | O_NOFOLLOW);
813✔
2220
        if (fd < 0)
813✔
2221
                return fd;
2222

2223
        r = fd_is_network_fs(fd);
783✔
2224
        if (r != 0)
783✔
2225
                return r;
2226

2227
        _cleanup_free_ char *fstype = NULL, *options = NULL;
783✔
2228
        r = path_get_mount_info_at(fd, /* path= */ NULL, &fstype, &options, /* ret_source= */ NULL);
783✔
2229
        if (r < 0)
783✔
2230
                return r;
2231

2232
        if (fstype_is_network(fstype))
783✔
2233
                return true;
2234

2235
        if (fstab_test_option(options, "_netdev\0"))
783✔
2236
                return true;
×
2237

2238
        return false;
2239
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc