• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 14369324968

09 Apr 2025 10:36PM UTC coverage: 71.955% (+0.04%) from 71.913%
14369324968

push

github

YHNdnzj
nspawn: replace prefix_roota() with chase()

5 of 7 new or added lines in 1 file covered. (71.43%)

1145 existing lines in 32 files now uncovered.

297058 of 412839 relevant lines covered (71.95%)

663845.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

64.16
/src/basic/mountpoint-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <errno.h>
4
#include <fcntl.h>
5
#include <sys/mount.h>
6

7
#include "alloc-util.h"
8
#include "chase.h"
9
#include "fd-util.h"
10
#include "fileio.h"
11
#include "filesystems.h"
12
#include "fs-util.h"
13
#include "missing_fcntl.h"
14
#include "missing_fs.h"
15
#include "missing_syscall.h"
16
#include "mkdir.h"
17
#include "mountpoint-util.h"
18
#include "nulstr-util.h"
19
#include "parse-util.h"
20
#include "path-util.h"
21
#include "stat-util.h"
22
#include "stdio-util.h"
23
#include "strv.h"
24
#include "user-util.h"
25

26
/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
27
 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
28
 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
29
 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
30
 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
31
 * with large file handles anyway. */
32
#define ORIGINAL_MAX_HANDLE_SZ 128
33

UNCOV
34
bool is_name_to_handle_at_fatal_error(int err) {
×
35
        /* name_to_handle_at() can return "acceptable" errors that are due to the context. For example
36
         * the file system does not support name_to_handle_at() (EOPNOTSUPP), or the syscall was blocked
37
         * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container), or
38
         * the mount point is not triggered yet (EOVERFLOW, think autofs+nfs4), or some general name_to_handle_at()
39
         * flakiness (EINVAL). However other errors are not supposed to happen and therefore are considered
40
         * fatal ones. */
41

UNCOV
42
        assert(err < 0);
×
43

UNCOV
44
        if (ERRNO_IS_NEG_NOT_SUPPORTED(err))
×
45
                return false;
46
        if (ERRNO_IS_NEG_PRIVILEGE(err))
×
47
                return false;
48

49
        return !IN_SET(err, -EOVERFLOW, -EINVAL);
×
50
}
51

52
int name_to_handle_at_loop(
27,239✔
53
                int fd,
54
                const char *path,
55
                struct file_handle **ret_handle,
56
                int *ret_mnt_id,
57
                int flags) {
58

59
        size_t n = ORIGINAL_MAX_HANDLE_SZ;
27,239✔
60

61
        assert(fd >= 0 || fd == AT_FDCWD);
27,239✔
62
        assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH|AT_HANDLE_FID)) == 0);
27,239✔
63

64
        /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
65
         * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
66
         * start value, it is not an upper bound on the buffer size required.
67
         *
68
         * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
69
         * as NULL if there's no interest in either. */
70

71
        for (;;) {
27,239✔
72
                _cleanup_free_ struct file_handle *h = NULL;
×
73
                int mnt_id = -1;
27,239✔
74

75
                h = malloc0(offsetof(struct file_handle, f_handle) + n);
27,239✔
76
                if (!h)
27,239✔
77
                        return -ENOMEM;
78

79
                h->handle_bytes = n;
27,239✔
80

81
                if (name_to_handle_at(fd, strempty(path), h, &mnt_id, flags) >= 0) {
54,469✔
82

83
                        if (ret_handle)
27,239✔
84
                                *ret_handle = TAKE_PTR(h);
27,239✔
85

86
                        if (ret_mnt_id)
27,239✔
87
                                *ret_mnt_id = mnt_id;
27,239✔
88

89
                        return 0;
27,239✔
90
                }
91
                if (errno != EOVERFLOW)
×
92
                        return -errno;
×
93

94
                if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
×
95

96
                        /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
97
                         * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
98
                         * be filled in, and the caller was interested in only the mount ID an nothing else. */
99

100
                        *ret_mnt_id = mnt_id;
×
101
                        return 0;
×
102
                }
103

104
                /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by
105
                 * something else (apparently EOVERFLOW is returned for untriggered nfs4 autofs mounts
106
                 * sometimes), not by the too small buffer. In that case propagate EOVERFLOW */
107
                if (h->handle_bytes <= n)
×
108
                        return -EOVERFLOW;
109

110
                /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
111
                n = h->handle_bytes;
×
112

113
                /* paranoia: check for overflow (note that .handle_bytes is unsigned only) */
114
                if (n > UINT_MAX - offsetof(struct file_handle, f_handle))
×
115
                        return -EOVERFLOW;
116
        }
117
}
118

119
int name_to_handle_at_try_fid(
27,239✔
120
                int fd,
121
                const char *path,
122
                struct file_handle **ret_handle,
123
                int *ret_mnt_id,
124
                int flags) {
125

126
        int r;
27,239✔
127

128
        assert(fd >= 0 || fd == AT_FDCWD);
27,239✔
129

130
        /* First issues name_to_handle_at() with AT_HANDLE_FID. If this fails and this is not a fatal error
131
         * we'll try without the flag, in order to support older kernels that didn't have AT_HANDLE_FID
132
         * (i.e. older than Linux 6.5). */
133

134
        r = name_to_handle_at_loop(fd, path, ret_handle, ret_mnt_id, flags | AT_HANDLE_FID);
27,239✔
135
        if (r >= 0 || is_name_to_handle_at_fatal_error(r))
27,239✔
136
                return r;
27,239✔
137

138
        return name_to_handle_at_loop(fd, path, ret_handle, ret_mnt_id, flags & ~AT_HANDLE_FID);
×
139
}
140

141
static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mnt_id) {
×
142
        char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
×
143
        _cleanup_free_ char *fdinfo = NULL;
×
144
        _cleanup_close_ int subfd = -EBADF;
×
145
        int r;
×
146

147
        assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
×
148
        assert(ret_mnt_id);
×
149

150
        if ((flags & AT_EMPTY_PATH) && isempty(filename))
×
151
                xsprintf(path, "/proc/self/fdinfo/%i", fd);
×
152
        else {
153
                subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
×
154
                if (subfd < 0)
×
155
                        return -errno;
×
156

157
                xsprintf(path, "/proc/self/fdinfo/%i", subfd);
×
158
        }
159

160
        r = read_full_virtual_file(path, &fdinfo, NULL);
×
161
        if (r == -ENOENT)
×
162
                return proc_fd_enoent_errno();
×
163
        if (r < 0)
×
164
                return r;
165

166
        char *p = find_line_startswith(fdinfo, "mnt_id:");
×
167
        if (!p)
×
168
                return -EBADMSG;
169

170
        p = skip_leading_chars(p, /* bad = */ NULL);
×
171
        p[strcspn(p, WHITESPACE)] = 0;
×
172

173
        return safe_atoi(p, ret_mnt_id);
×
174
}
175

176
static bool filename_possibly_with_slash_suffix(const char *s) {
79,284✔
177
        const char *slash, *copied;
79,284✔
178

179
        /* Checks whether the specified string is either file name, or a filename with a suffix of
180
         * slashes. But nothing else.
181
         *
182
         * this is OK: foo, bar, foo/, bar/, foo//, bar///
183
         * this is not OK: "", "/", "/foo", "foo/bar", ".", ".." … */
184

185
        slash = strchr(s, '/');
79,284✔
186
        if (!slash)
79,284✔
187
                return filename_is_valid(s);
79,270✔
188

189
        if (slash - s > PATH_MAX) /* We want to allocate on the stack below, hence do a size check first */
14✔
190
                return false;
191

192
        if (slash[strspn(slash, "/")] != 0) /* Check that the suffix consist only of one or more slashes */
14✔
193
                return false;
194

195
        copied = strndupa_safe(s, slash - s);
10✔
196
        return filename_is_valid(copied);
10✔
197
}
198

199
bool file_handle_equal(const struct file_handle *a, const struct file_handle *b) {
13,615✔
200
        if (a == b)
13,615✔
201
                return true;
202
        if (!a != !b)
13,615✔
203
                return false;
204
        if (a->handle_type != b->handle_type)
13,615✔
205
                return false;
206

207
        return memcmp_nn(a->f_handle, a->handle_bytes, b->f_handle, b->handle_bytes) == 0;
13,601✔
208
}
209

210
int is_mount_point_at(int fd, const char *filename, int flags) {
79,532✔
211
        bool fd_is_self;
79,532✔
212
        int r;
79,532✔
213

214
        assert(fd >= 0 || fd == AT_FDCWD);
79,532✔
215
        assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
79,532✔
216

217
        if (isempty(filename)) {
79,532✔
218
                if (fd == AT_FDCWD)
199✔
219
                        filename = ".";
220
                else {
221
                        /* If the file name is empty we'll see if the specified 'fd' is a mount point.
222
                         * That's only supported by statx(), or if the inode specified via 'fd' refers to a
223
                         * directory. Otherwise, we'll have to fail (ENOTDIR), because we have no kernel API
224
                         * to query the information we need. */
225
                        flags |= AT_EMPTY_PATH;
197✔
226
                        filename = "";
197✔
227
                }
228

229
                fd_is_self = true;
230
        } else if (STR_IN_SET(filename, ".", "./"))
79,333✔
231
                fd_is_self = true;
232
        else {
233
                /* Insist that the specified filename is actually a filename, and not a path, i.e. some inode
234
                 * further up or down the tree then immediately below the specified directory fd. */
235
                if (!filename_possibly_with_slash_suffix(filename))
79,284✔
236
                        return -EINVAL;
7✔
237

238
                fd_is_self = false;
239
        }
240

241
        /* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available
242
         * since kernel 5.8.
243
         *
244
         * If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and
245
         * an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not
246
         * all file systems are hooked up). If it works the mount id is usually good enough to tell us
247
         * whether something is a mount point.
248
         *
249
         * If that didn't work we will try to read the mount id from /proc/self/fdinfo/<fd>. This is almost
250
         * as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file
251
         * handle is pretty useful to detect the root directory, which we should always consider a mount
252
         * point. Hence we use this only as fallback.
253
         *
254
         * Note that traditionally the check is done via fstat()-based st_dev comparisons. However, various
255
         * file systems don't guarantee same st_dev across single fs anymore, e.g. unionfs exposes file systems
256
         * with a variety of st_dev reported. Also, btrfs subvolumes have different st_dev, even though
257
         * they aren't real mounts of their own. */
258

259
        struct statx sx = {}; /* explicitly initialize the struct to make msan silent. */
79,525✔
260
        if (statx(fd, filename,
79,525✔
261
                  at_flags_normalize_nofollow(flags) |
79,525✔
262
                  AT_NO_AUTOMOUNT |            /* don't trigger automounts – mounts are a local concept, hence no need to trigger automounts to determine STATX_ATTR_MOUNT_ROOT */
263
                  AT_STATX_DONT_SYNC,          /* don't go to the network for this – for similar reasons */
264
                  STATX_TYPE,
265
                  &sx) < 0)
266
                return -errno;
3,768✔
267

268
        if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */
75,757✔
269
                return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
75,757✔
270

271
        _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
×
272
        int mount_id = -1, mount_id_parent = -1;
×
273
        bool nosupp = false;
×
274

275
        r = name_to_handle_at_try_fid(fd, filename, &h, &mount_id, flags);
×
276
        if (r < 0) {
×
277
                if (is_name_to_handle_at_fatal_error(r))
×
278
                        return r;
279
                if (!ERRNO_IS_NOT_SUPPORTED(r))
×
280
                        goto fallback_fdinfo;
×
281

282
                /* This file system does not support name_to_handle_at(), hence let's see if the upper fs
283
                 * supports it (in which case it is a mount point), otherwise fall back to the fdinfo logic. */
284
                nosupp = true;
285
        }
286

287
        if (fd_is_self)
×
288
                r = name_to_handle_at_try_fid(fd, "..", &h_parent, &mount_id_parent, 0); /* can't work for non-directories 😢 */
×
289
        else
290
                r = name_to_handle_at_try_fid(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
×
291
        if (r < 0) {
×
292
                if (is_name_to_handle_at_fatal_error(r))
×
293
                        return r;
294
                if (!ERRNO_IS_NOT_SUPPORTED(r))
×
295
                        goto fallback_fdinfo;
×
296
                if (nosupp)
×
297
                        /* Both the parent and the directory can't do name_to_handle_at() */
298
                        goto fallback_fdinfo;
×
299

300
                /* The parent can't do name_to_handle_at() but the directory we are
301
                 * interested in can?  If so, it must be a mount point. */
302
                return 1;
303
        }
304

305
        /* The parent can do name_to_handle_at() but the directory we are interested in can't? If
306
         * so, it must be a mount point. */
307
        if (nosupp)
×
308
                return 1;
309

310
        /* If the file handle for the directory we are interested in and its parent are identical,
311
         * we assume this is the root directory, which is a mount point. */
312
        if (file_handle_equal(h_parent, h))
×
313
                return 1;
314

315
        return mount_id != mount_id_parent;
×
316

317
fallback_fdinfo:
×
318
        r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
×
319
        if (r < 0)
×
320
                return r;
321

322
        if (fd_is_self)
×
323
                r = fd_fdinfo_mnt_id(fd, "..", 0, &mount_id_parent); /* can't work for non-directories 😢 */
×
324
        else
325
                r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
×
326
        if (r < 0)
×
327
                return r;
328

329
        if (mount_id != mount_id_parent)
×
330
                return 1;
331

332
        /* Hmm, so, the mount ids are the same. This leaves one special case though for the root file
333
         * system. For that, let's see if the parent directory has the same inode as we are interested
334
         * in. */
335

336
        struct stat a, b;
×
337

338
        /* yay for fstatat() taking a different set of flags than the other _at() above */
339
        if (fstatat(fd, filename, &a, at_flags_normalize_nofollow(flags)) < 0)
×
340
                return -errno;
×
341

342
        if (fd_is_self)
×
343
                r = fstatat(fd, "..", &b, 0);
×
344
        else
345
                r = fstatat(fd, "", &b, AT_EMPTY_PATH);
×
346
        if (r < 0)
×
347
                return -errno;
×
348

349
        /* A directory with same device and inode as its parent must be the root directory. Otherwise
350
         * not a mount point.
351
         *
352
         * NB: we avoid inode_same_at() here because it internally attempts name_to_handle_at_try_fid() first,
353
         * which is redundant. */
354
        return stat_inode_same(&a, &b);
×
355
}
356

357
/* flags can be AT_SYMLINK_FOLLOW or 0 */
358
int path_is_mount_point_full(const char *path, const char *root, int flags) {
26,264✔
359
        _cleanup_close_ int dfd = -EBADF;
26,264✔
360
        _cleanup_free_ char *fn = NULL;
26,264✔
361

362
        assert(path);
26,264✔
363
        assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
26,264✔
364

365
        if (path_equal(path, "/"))
26,264✔
366
                return 1;
367

368
        /* we need to resolve symlinks manually, we can't just rely on is_mount_point_at() to do that for us;
369
         * if we have a structure like /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
370
         * look at needs to be /usr, not /. */
371
        dfd = chase_and_open_parent(path, root,
26,257✔
372
                                    CHASE_TRAIL_SLASH|(FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : CHASE_NOFOLLOW),
26,257✔
373
                                    &fn);
374
        if (dfd < 0)
26,257✔
375
                return dfd;
376

377
        return is_mount_point_at(dfd, fn, flags);
26,064✔
378
}
379

380
int path_get_mnt_id_at_fallback(int dir_fd, const char *path, int *ret) {
×
381
        int r;
×
382

383
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
×
384
        assert(ret);
×
385

386
        r = name_to_handle_at_loop(dir_fd, path, NULL, ret, isempty(path) ? AT_EMPTY_PATH : 0);
×
387
        if (r >= 0 || is_name_to_handle_at_fatal_error(r))
×
388
                return r;
×
389

390
        return fd_fdinfo_mnt_id(dir_fd, path, isempty(path) ? AT_EMPTY_PATH : 0, ret);
×
391
}
392

393
int path_get_mnt_id_at(int dir_fd, const char *path, int *ret) {
6,224✔
394
        struct statx sx;
6,224✔
395

396
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
6,224✔
397
        assert(ret);
6,224✔
398

399
        if (statx(dir_fd,
12,448✔
400
                  strempty(path),
6,224✔
401
                  (isempty(path) ? AT_EMPTY_PATH : AT_SYMLINK_NOFOLLOW) |
6,224✔
402
                  AT_NO_AUTOMOUNT |    /* don't trigger automounts, mnt_id is a local concept */
403
                  AT_STATX_DONT_SYNC,  /* don't go to the network, mnt_id is a local concept */
404
                  STATX_MNT_ID,
405
                  &sx) < 0)
406
                return -errno;
1✔
407

408
        if (FLAGS_SET(sx.stx_mask, STATX_MNT_ID)) {
6,223✔
409
                *ret = sx.stx_mnt_id;
6,223✔
410
                return 0;
6,223✔
411
        }
412

413
        return path_get_mnt_id_at_fallback(dir_fd, path, ret);
×
414
}
415

416
bool fstype_is_network(const char *fstype) {
1,952✔
417
        const char *x;
1,952✔
418

419
        x = startswith(fstype, "fuse.");
1,952✔
420
        if (x)
1,952✔
421
                fstype = x;
×
422

423
        if (nulstr_contains(filesystem_sets[FILESYSTEM_SET_NETWORK].value, fstype))
1,952✔
424
                return true;
1,952✔
425

426
        /* Filesystems not present in the internal database */
427
        return STR_IN_SET(fstype,
1,948✔
428
                          "davfs",
429
                          "glusterfs",
430
                          "lustre",
431
                          "sshfs");
432
}
433

434
bool fstype_needs_quota(const char *fstype) {
×
435
       /* 1. quotacheck needs to be run for some filesystems after they are mounted
436
        *    if the filesystem was not unmounted cleanly.
437
        * 2. You may need to run quotaon to enable quota usage tracking and/or
438
        *    enforcement.
439
        * ext2     - needs 1) and 2)
440
        * ext3     - needs 2) if configured using usrjquota/grpjquota mount options
441
        * ext4     - needs 1) if created without journal, needs 2) if created without QUOTA
442
        *            filesystem feature
443
        * reiserfs - needs 2).
444
        * jfs      - needs 2)
445
        * f2fs     - needs 2) if configured using usrjquota/grpjquota/prjjquota mount options
446
        * xfs      - nothing needed
447
        * gfs2     - nothing needed
448
        * ocfs2    - nothing needed
449
        * btrfs    - nothing needed
450
        * for reference see filesystem and quota manpages */
451
        return STR_IN_SET(fstype,
×
452
                          "ext2",
453
                          "ext3",
454
                          "ext4",
455
                          "reiserfs",
456
                          "jfs",
457
                          "f2fs");
458
}
459

460
bool fstype_is_api_vfs(const char *fstype) {
59✔
461
        assert(fstype);
59✔
462

463
        const FilesystemSet *fs;
59✔
464
        FOREACH_ARGUMENT(fs,
267✔
465
                         filesystem_sets + FILESYSTEM_SET_BASIC_API,
466
                         filesystem_sets + FILESYSTEM_SET_AUXILIARY_API,
467
                         filesystem_sets + FILESYSTEM_SET_PRIVILEGED_API,
468
                         filesystem_sets + FILESYSTEM_SET_TEMPORARY)
469
                if (nulstr_contains(fs->value, fstype))
236✔
470
                    return true;
28✔
471

472
        /* Filesystems not present in the internal database */
473
        return STR_IN_SET(fstype,
31✔
474
                          "autofs",
475
                          "cpuset",
476
                          "devtmpfs");
477
}
478

479
bool fstype_is_blockdev_backed(const char *fstype) {
29✔
480
        const char *x;
29✔
481

482
        x = startswith(fstype, "fuse.");
29✔
483
        if (x)
29✔
484
                fstype = x;
×
485

486
        return !streq(fstype, "9p") && !fstype_is_network(fstype) && !fstype_is_api_vfs(fstype);
29✔
487
}
488

489
bool fstype_is_ro(const char *fstype) {
2,783✔
490
        /* All Linux file systems that are necessarily read-only */
491
        return STR_IN_SET(fstype,
2,783✔
492
                          "DM_verity_hash",
493
                          "cramfs",
494
                          "erofs",
495
                          "iso9660",
496
                          "squashfs");
497
}
498

499
bool fstype_can_discard(const char *fstype) {
4✔
500
        assert(fstype);
4✔
501

502
        /* Use a curated list as first check, to avoid calling fsopen() which might load kmods, which might
503
         * not be allowed in our MAC context. */
504
        if (STR_IN_SET(fstype, "btrfs", "f2fs", "ext4", "vfat", "xfs"))
4✔
505
                return true;
1✔
506

507
        /* On new kernels we can just ask the kernel */
508
        return mount_option_supported(fstype, "discard", NULL) > 0;
3✔
509
}
510

511
const char* fstype_norecovery_option(const char *fstype) {
163✔
512
        int r;
163✔
513

514
        assert(fstype);
163✔
515

516
        /* Use a curated list as first check, to avoid calling fsopen() which might load kmods, which might
517
         * not be allowed in our MAC context. */
518
        if (STR_IN_SET(fstype, "ext3", "ext4", "xfs"))
163✔
519
                return "norecovery";
17✔
520

521
        /* btrfs dropped support for the "norecovery" option in 6.8
522
         * (https://github.com/torvalds/linux/commit/a1912f712188291f9d7d434fba155461f1ebef66) and replaced
523
         * it with rescue=nologreplay, so we check for the new name first and fall back to checking for the
524
         * old name if the new name doesn't work. */
525
        if (streq(fstype, "btrfs")) {
146✔
526
                r = mount_option_supported(fstype, "rescue=nologreplay", NULL);
×
527
                if (r == -EAGAIN) {
×
528
                        log_debug_errno(r, "Failed to check for btrfs 'rescue=nologreplay' option, assuming old kernel with 'norecovery': %m");
×
529
                        return "norecovery";
×
530
                }
531
                if (r < 0)
×
532
                        log_debug_errno(r, "Failed to check for btrfs 'rescue=nologreplay' option, assuming it is not supported: %m");
×
533
                if (r > 0)
×
534
                        return "rescue=nologreplay";
535
        }
536

537
        /* On new kernels we can just ask the kernel */
538
        return mount_option_supported(fstype, "norecovery", NULL) > 0 ? "norecovery" : NULL;
146✔
539
}
540

541
bool fstype_can_fmask_dmask(const char *fstype) {
57✔
542
        assert(fstype);
57✔
543

544
        /* Use a curated list as first check, to avoid calling fsopen() which might load kmods, which might
545
         * not be allowed in our MAC context. If we don't know ourselves, on new kernels we can just ask the
546
         * kernel. */
547
        return streq(fstype, "vfat") || (mount_option_supported(fstype, "fmask", "0177") > 0 && mount_option_supported(fstype, "dmask", "0077") > 0);
57✔
548
}
549

550
bool fstype_can_uid_gid(const char *fstype) {
1✔
551
        /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and
552
         * directories, current and future. Note that this does *not* ask the kernel via
553
         * mount_option_supported() here because the uid=/gid= setting of various file systems mean different
554
         * things: some apply it only to the root dir inode, others to all inodes in the file system. Thus we
555
         * maintain the curated list below. 😢 */
556

557
        return STR_IN_SET(fstype,
1✔
558
                          "adfs",
559
                          "exfat",
560
                          "fat",
561
                          "hfs",
562
                          "hpfs",
563
                          "iso9660",
564
                          "msdos",
565
                          "ntfs",
566
                          "vfat");
567
}
568

569
int dev_is_devtmpfs(void) {
249✔
570
        _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
249✔
571
        int mount_id, r;
249✔
572
        char *e;
249✔
573

574
        r = path_get_mnt_id("/dev", &mount_id);
249✔
575
        if (r < 0)
249✔
576
                return r;
577

578
        r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
249✔
579
        if (r == -ENOENT)
249✔
580
                return proc_mounted() > 0 ? -ENOENT : -ENOSYS;
×
581
        if (r < 0)
249✔
582
                return r;
583

584
        for (;;) {
11,227✔
585
                _cleanup_free_ char *line = NULL;
10,993✔
586
                int mid;
11,227✔
587

588
                r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
11,227✔
589
                if (r < 0)
11,227✔
590
                        return r;
591
                if (r == 0)
11,227✔
592
                        break;
593

594
                if (sscanf(line, "%i", &mid) != 1)
10,993✔
595
                        continue;
×
596

597
                if (mid != mount_id)
10,993✔
598
                        continue;
10,744✔
599

600
                e = strstrafter(line, " - ");
249✔
601
                if (!e)
249✔
602
                        continue;
×
603

604
                /* accept any name that starts with the currently expected type */
605
                if (startswith(e, "devtmpfs"))
249✔
606
                        return true;
607
        }
608

609
        return false;
234✔
610
}
611

612
static int mount_fd(
65,979✔
613
                const char *source,
614
                int target_fd,
615
                const char *filesystemtype,
616
                unsigned long mountflags,
617
                const void *data) {
618

619
        assert(target_fd >= 0);
65,979✔
620

621
        if (mount(source, FORMAT_PROC_FD_PATH(target_fd), filesystemtype, mountflags, data) < 0) {
65,979✔
622
                if (errno != ENOENT)
991✔
623
                        return -errno;
991✔
624

625
                /* ENOENT can mean two things: either that the source is missing, or that /proc/ isn't
626
                 * mounted. Check for the latter to generate better error messages. */
627
                if (proc_mounted() == 0)
490✔
628
                        return -ENOSYS;
629

630
                return -ENOENT;
490✔
631
        }
632

633
        return 0;
64,988✔
634
}
635

636
int mount_nofollow(
67,419✔
637
                const char *source,
638
                const char *target,
639
                const char *filesystemtype,
640
                unsigned long mountflags,
641
                const void *data) {
642

643
        _cleanup_close_ int fd = -EBADF;
67,419✔
644

645
        assert(target);
67,419✔
646

647
        /* In almost all cases we want to manipulate the mount table without following symlinks, hence
648
         * mount_nofollow() is usually the way to go. The only exceptions are environments where /proc/ is
649
         * not available yet, since we need /proc/self/fd/ for this logic to work. i.e. during the early
650
         * initialization of namespacing/container stuff where /proc is not yet mounted (and maybe even the
651
         * fs to mount) we can only use traditional mount() directly.
652
         *
653
         * Note that this disables following only for the final component of the target, i.e symlinks within
654
         * the path of the target are honoured, as are symlinks in the source path everywhere. */
655

656
        fd = open(target, O_PATH|O_CLOEXEC|O_NOFOLLOW);
67,419✔
657
        if (fd < 0)
67,419✔
658
                return -errno;
1,440✔
659

660
        return mount_fd(source, fd, filesystemtype, mountflags, data);
65,979✔
661
}
662

663
const char* mount_propagation_flag_to_string(unsigned long flags) {
9✔
664

665
        switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
9✔
666
        case 0:
667
                return "";
668
        case MS_SHARED:
1✔
669
                return "shared";
1✔
670
        case MS_SLAVE:
1✔
671
                return "slave";
1✔
672
        case MS_PRIVATE:
3✔
673
                return "private";
3✔
674
        }
675

676
        return NULL;
×
677
}
678

679
int mount_propagation_flag_from_string(const char *name, unsigned long *ret) {
8✔
680

681
        if (isempty(name))
8✔
682
                *ret = 0;
2✔
683
        else if (streq(name, "shared"))
6✔
684
                *ret = MS_SHARED;
1✔
685
        else if (streq(name, "slave"))
5✔
686
                *ret = MS_SLAVE;
1✔
687
        else if (streq(name, "private"))
4✔
688
                *ret = MS_PRIVATE;
2✔
689
        else
690
                return -EINVAL;
691
        return 0;
692
}
693

694
bool mount_propagation_flag_is_valid(unsigned long flag) {
2,209✔
695
        return IN_SET(flag, 0, MS_SHARED, MS_PRIVATE, MS_SLAVE);
2,209✔
696
}
697

698
bool mount_new_api_supported(void) {
5,985✔
699
        static int cache = -1;
5,985✔
700
        int r;
5,985✔
701

702
        if (cache >= 0)
5,985✔
703
                return cache;
1,595✔
704

705
        /* This is the newer API among the ones we use, so use it as boundary */
706
        r = RET_NERRNO(mount_setattr(-EBADF, NULL, 0, NULL, 0));
4,390✔
707
        if (r == 0 || ERRNO_IS_NOT_SUPPORTED(r)) /* This should return an error if it is working properly */
4,390✔
708
                return (cache = false);
×
709

710
        return (cache = true);
4,390✔
711
}
712

713
unsigned long ms_nosymfollow_supported(void) {
4,832✔
714
        _cleanup_close_ int fsfd = -EBADF, mntfd = -EBADF;
4,832✔
715
        static int cache = -1;
4,832✔
716

717
        /* Returns MS_NOSYMFOLLOW if it is supported, zero otherwise. */
718

719
        if (cache >= 0)
4,832✔
720
                return cache ? MS_NOSYMFOLLOW : 0;
2,518✔
721

722
        if (!mount_new_api_supported())
2,314✔
723
                goto not_supported;
×
724

725
        /* Checks if MS_NOSYMFOLLOW is supported (which was added in 5.10). We use the new mount API's
726
         * mount_setattr() call for that, which was added in 5.12, which is close enough. */
727

728
        fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
2,314✔
729
        if (fsfd < 0) {
2,314✔
730
                if (ERRNO_IS_NOT_SUPPORTED(errno))
2✔
731
                        goto not_supported;
×
732

733
                log_debug_errno(errno, "Failed to open superblock context for tmpfs: %m");
2✔
734
                return 0;
2✔
735
        }
736

737
        if (fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) {
2,312✔
738
                if (ERRNO_IS_NOT_SUPPORTED(errno))
×
739
                        goto not_supported;
×
740

741
                log_debug_errno(errno, "Failed to create tmpfs superblock: %m");
×
742
                return 0;
×
743
        }
744

745
        mntfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
2,312✔
746
        if (mntfd < 0) {
2,312✔
747
                if (ERRNO_IS_NOT_SUPPORTED(errno))
×
748
                        goto not_supported;
×
749

750
                log_debug_errno(errno, "Failed to turn superblock fd into mount fd: %m");
×
751
                return 0;
×
752
        }
753

754
        if (mount_setattr(mntfd, "", AT_EMPTY_PATH|AT_RECURSIVE,
2,312✔
755
                          &(struct mount_attr) {
2,312✔
756
                                  .attr_set = MOUNT_ATTR_NOSYMFOLLOW,
757
                          }, sizeof(struct mount_attr)) < 0) {
758
                if (ERRNO_IS_NOT_SUPPORTED(errno))
×
759
                        goto not_supported;
×
760

761
                log_debug_errno(errno, "Failed to set MOUNT_ATTR_NOSYMFOLLOW mount attribute: %m");
×
762
                return 0;
×
763
        }
764

765
        cache = true;
2,312✔
766
        return MS_NOSYMFOLLOW;
2,312✔
767

768
not_supported:
×
769
        cache = false;
×
770
        return 0;
×
771
}
772

773
int mount_option_supported(const char *fstype, const char *key, const char *value) {
3,609✔
774
        _cleanup_close_ int fd = -EBADF;
3,609✔
775
        int r;
3,609✔
776

777
        /* Checks if the specified file system supports a mount option. Returns > 0 if it supports it, == 0 if
778
         * it does not. Return -EAGAIN if we can't determine it. And any other error otherwise. */
779

780
        assert(fstype);
3,609✔
781
        assert(key);
3,609✔
782

783
        fd = fsopen(fstype, FSOPEN_CLOEXEC);
3,609✔
784
        if (fd < 0)
3,609✔
785
                return log_debug_errno(errno, "Failed to open superblock context for '%s': %m", fstype);
1✔
786

787
        /* Various file systems support fs context only in recent kernels (e.g. btrfs). For older kernels
788
         * fsconfig() with FSCONFIG_SET_STRING/FSCONFIG_SET_FLAG never fail. Which sucks, because we want to
789
         * use it for testing support, after all. Let's hence do a check if the file system got converted yet
790
         * first. */
791
        if (fsconfig(fd, FSCONFIG_SET_FD, "adefinitelynotexistingmountoption", NULL, fd) < 0) {
3,608✔
792
                /* If FSCONFIG_SET_FD is not supported for the fs, then the file system was not converted to
793
                 * the new mount API yet. If it returns EINVAL the mount option doesn't exist, but the fstype
794
                 * is converted. */
795
                if (errno == EOPNOTSUPP)
3,608✔
796
                        return -EAGAIN; /* fs not converted to new mount API → don't know */
797
                if (errno != EINVAL)
3,606✔
798
                        return log_debug_errno(errno, "Failed to check if file system '%s' has been converted to new mount API: %m", fstype);
×
799

800
                /* So FSCONFIG_SET_FD worked, but the option didn't exist (we got EINVAL), this means the fs
801
                 * is converted. Let's now ask the actual question we wonder about. */
802
        } else
803
                return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN), "FSCONFIG_SET_FD worked unexpectedly for '%s', whoa!", fstype);
×
804

805
        if (value)
3,606✔
806
                r = fsconfig(fd, FSCONFIG_SET_STRING, key, value, 0);
876✔
807
        else
808
                r = fsconfig(fd, FSCONFIG_SET_FLAG, key, NULL, 0);
2,730✔
809
        if (r < 0) {
3,606✔
810
                if (errno == EINVAL)
192✔
811
                        return false; /* EINVAL means option not supported. */
812

813
                return log_debug_errno(errno, "Failed to set '%s%s%s' on '%s' superblock context: %m",
×
814
                                       key, value ? "=" : "", strempty(value), fstype);
815
        }
816

817
        return true; /* works! */
818
}
819

820
bool path_below_api_vfs(const char *p) {
7,561✔
821
        assert(p);
7,561✔
822

823
        /* API VFS are either directly mounted on any of these three paths, or below it. */
824
        return PATH_STARTSWITH_SET(p, "/dev", "/sys", "/proc");
7,561✔
825
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc