• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 19282013399

12 Nov 2025 12:00AM UTC coverage: 72.412% (+0.01%) from 72.402%
19282013399

push

github

web-flow
core/exec-credentials: port to new mount API, ensure atomicity for creds installation (#39637)

103 of 137 new or added lines in 4 files covered. (75.18%)

850 existing lines in 45 files now uncovered.

307170 of 424195 relevant lines covered (72.41%)

1105108.57 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

64.83
/src/basic/mountpoint-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <fcntl.h>
4
#include <sys/mount.h>
5

6
#include "alloc-util.h"
7
#include "chase.h"
8
#include "errno-util.h"
9
#include "fd-util.h"
10
#include "fileio.h"
11
#include "filesystems.h"
12
#include "fs-util.h"
13
#include "log.h"
14
#include "mountpoint-util.h"
15
#include "nulstr-util.h"
16
#include "parse-util.h"
17
#include "path-util.h"
18
#include "stat-util.h"
19
#include "stdio-util.h"
20
#include "string-util.h"
21
#include "strv.h"
22

23
/* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
24
 * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
25
 * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
26
 * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
27
 * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
28
 * with large file handles anyway. */
29
#define ORIGINAL_MAX_HANDLE_SZ 128
30

31
bool is_name_to_handle_at_fatal_error(int err) {
2,727✔
32
        /* name_to_handle_at() can return "acceptable" errors that are due to the context. For example
33
         * the file system does not support name_to_handle_at() (EOPNOTSUPP), or the syscall was blocked
34
         * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container), or
35
         * the mount point is not triggered yet (EOVERFLOW, think autofs+nfs4), or some general name_to_handle_at()
36
         * flakiness (EINVAL). However other errors are not supposed to happen and therefore are considered
37
         * fatal ones. */
38

39
        assert(err < 0);
2,727✔
40

41
        if (ERRNO_IS_NEG_NOT_SUPPORTED(err))
2,727✔
42
                return false;
43
        if (ERRNO_IS_NEG_PRIVILEGE(err))
×
44
                return false;
45

46
        return !IN_SET(err, -EOVERFLOW, -EINVAL);
×
47
}
48

49
int name_to_handle_at_loop(
25,563✔
50
                int fd,
51
                const char *path,
52
                struct file_handle **ret_handle,
53
                int *ret_mnt_id,
54
                int flags) {
55

56
        size_t n = ORIGINAL_MAX_HANDLE_SZ;
25,563✔
57

58
        assert(fd >= 0 || fd == AT_FDCWD);
25,563✔
59
        assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH|AT_HANDLE_FID)) == 0);
25,563✔
60

61
        /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
62
         * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
63
         * start value, it is not an upper bound on the buffer size required.
64
         *
65
         * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
66
         * as NULL if there's no interest in either. */
67

68
        for (;;) {
25,563✔
69
                _cleanup_free_ struct file_handle *h = NULL;
×
70
                int mnt_id = -1;
25,563✔
71

72
                h = malloc0(offsetof(struct file_handle, f_handle) + n);
25,563✔
73
                if (!h)
25,563✔
74
                        return -ENOMEM;
75

76
                h->handle_bytes = n;
25,563✔
77

78
                if (name_to_handle_at(fd, strempty(path), h, &mnt_id, flags) >= 0) {
51,117✔
79

80
                        if (ret_handle)
25,563✔
81
                                *ret_handle = TAKE_PTR(h);
25,563✔
82

83
                        if (ret_mnt_id)
25,563✔
84
                                *ret_mnt_id = mnt_id;
25,563✔
85

86
                        return 0;
25,563✔
87
                }
88
                if (errno != EOVERFLOW)
×
89
                        return -errno;
×
90

91
                if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
×
92

93
                        /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
94
                         * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
95
                         * be filled in, and the caller was interested in only the mount ID an nothing else. */
96

97
                        *ret_mnt_id = mnt_id;
×
98
                        return 0;
×
99
                }
100

101
                /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by
102
                 * something else (apparently EOVERFLOW is returned for untriggered nfs4 autofs mounts
103
                 * sometimes), not by the too small buffer. In that case propagate EOVERFLOW */
104
                if (h->handle_bytes <= n)
×
105
                        return -EOVERFLOW;
106

107
                /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
108
                n = h->handle_bytes;
×
109

110
                /* paranoia: check for overflow (note that .handle_bytes is unsigned only) */
111
                if (n > UINT_MAX - offsetof(struct file_handle, f_handle))
×
112
                        return -EOVERFLOW;
113
        }
114
}
115

116
int name_to_handle_at_try_fid(
25,563✔
117
                int fd,
118
                const char *path,
119
                struct file_handle **ret_handle,
120
                int *ret_mnt_id,
121
                int flags) {
122

123
        int r;
25,563✔
124

125
        assert(fd >= 0 || fd == AT_FDCWD);
25,563✔
126

127
        /* First issues name_to_handle_at() with AT_HANDLE_FID. If this fails and this is not a fatal error
128
         * we'll try without the flag, in order to support older kernels that didn't have AT_HANDLE_FID
129
         * (i.e. older than Linux 6.5). */
130

131
        r = name_to_handle_at_loop(fd, path, ret_handle, ret_mnt_id, flags | AT_HANDLE_FID);
25,563✔
132
        if (r >= 0 || is_name_to_handle_at_fatal_error(r))
25,563✔
133
                return r;
25,563✔
134

135
        return name_to_handle_at_loop(fd, path, ret_handle, ret_mnt_id, flags & ~AT_HANDLE_FID);
×
136
}
137

138
static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mnt_id) {
×
139
        char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
×
140
        _cleanup_close_ int subfd = -EBADF;
×
141
        int r;
×
142

143
        assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
×
144
        assert(ret_mnt_id);
×
145

146
        if ((flags & AT_EMPTY_PATH) && isempty(filename))
×
147
                xsprintf(path, "/proc/self/fdinfo/%i", fd);
×
148
        else {
149
                subfd = openat(fd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_FOLLOW ? 0 : O_NOFOLLOW));
×
150
                if (subfd < 0)
×
151
                        return -errno;
×
152

153
                xsprintf(path, "/proc/self/fdinfo/%i", subfd);
×
154
        }
155

156
        _cleanup_free_ char *p = NULL;
×
157
        r = get_proc_field(path, "mnt_id", &p);
×
158
        if (r == -ENOENT)
×
159
                return -EBADF;
160
        if (r < 0)
×
161
                return r;
162

163
        return safe_atoi(p, ret_mnt_id);
×
164
}
165

166
static bool filename_possibly_with_slash_suffix(const char *s) {
83,365✔
167
        const char *slash, *copied;
83,365✔
168

169
        /* Checks whether the specified string is either file name, or a filename with a suffix of
170
         * slashes. But nothing else.
171
         *
172
         * this is OK: foo, bar, foo/, bar/, foo//, bar///
173
         * this is not OK: "", "/", "/foo", "foo/bar", ".", ".." … */
174

175
        slash = strchr(s, '/');
83,365✔
176
        if (!slash)
83,365✔
177
                return filename_is_valid(s);
83,351✔
178

179
        if (slash - s > PATH_MAX) /* We want to allocate on the stack below, hence do a size check first */
14✔
180
                return false;
181

182
        if (slash[strspn(slash, "/")] != 0) /* Check that the suffix consist only of one or more slashes */
14✔
183
                return false;
184

185
        copied = strndupa_safe(s, slash - s);
10✔
186
        return filename_is_valid(copied);
10✔
187
}
188

189
bool file_handle_equal(const struct file_handle *a, const struct file_handle *b) {
11,815✔
190
        if (a == b)
11,815✔
191
                return true;
192
        if (!a != !b)
11,815✔
193
                return false;
194
        if (a->handle_type != b->handle_type)
11,815✔
195
                return false;
196

197
        return memcmp_nn(a->f_handle, a->handle_bytes, b->f_handle, b->handle_bytes) == 0;
11,796✔
198
}
199

200
int is_mount_point_at(int fd, const char *filename, int flags) {
83,638✔
201
        bool fd_is_self;
83,638✔
202
        int r;
83,638✔
203

204
        assert(fd >= 0 || fd == AT_FDCWD);
83,638✔
205
        assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
83,638✔
206

207
        if (isempty(filename)) {
83,638✔
208
                if (fd == AT_FDCWD)
222✔
209
                        filename = ".";
210
                else {
211
                        /* If the file name is empty we'll see if the specified 'fd' is a mount point.
212
                         * That's only supported by statx(), or if the inode specified via 'fd' refers to a
213
                         * directory. Otherwise, we'll have to fail (ENOTDIR), because we have no kernel API
214
                         * to query the information we need. */
215
                        flags |= AT_EMPTY_PATH;
220✔
216
                        filename = "";
220✔
217
                }
218

219
                fd_is_self = true;
220
        } else if (STR_IN_SET(filename, ".", "./"))
83,416✔
221
                fd_is_self = true;
222
        else {
223
                /* Insist that the specified filename is actually a filename, and not a path, i.e. some inode
224
                 * further up or down the tree then immediately below the specified directory fd. */
225
                if (!filename_possibly_with_slash_suffix(filename))
83,365✔
226
                        return -EINVAL;
7✔
227

228
                fd_is_self = false;
229
        }
230

231
        /* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available
232
         * since kernel 5.8.
233
         *
234
         * If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and
235
         * an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not
236
         * all file systems are hooked up). If it works the mount id is usually good enough to tell us
237
         * whether something is a mount point.
238
         *
239
         * If that didn't work we will try to read the mount id from /proc/self/fdinfo/<fd>. This is almost
240
         * as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file
241
         * handle is pretty useful to detect the root directory, which we should always consider a mount
242
         * point. Hence we use this only as fallback.
243
         *
244
         * Note that traditionally the check is done via fstat()-based st_dev comparisons. However, various
245
         * file systems don't guarantee same st_dev across single fs anymore, e.g. unionfs exposes file systems
246
         * with a variety of st_dev reported. Also, btrfs subvolumes have different st_dev, even though
247
         * they aren't real mounts of their own. */
248

249
        struct statx sx = {}; /* explicitly initialize the struct to make msan silent. */
83,631✔
250
        if (statx(fd, filename,
83,631✔
251
                  at_flags_normalize_nofollow(flags) |
83,631✔
252
                  AT_NO_AUTOMOUNT |            /* don't trigger automounts – mounts are a local concept, hence no need to trigger automounts to determine STATX_ATTR_MOUNT_ROOT */
253
                  AT_STATX_DONT_SYNC,          /* don't go to the network for this – for similar reasons */
254
                  STATX_TYPE,
255
                  &sx) < 0)
256
                return -errno;
3,781✔
257

258
        if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */
79,850✔
259
                return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
79,850✔
260

261
        _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
×
262
        int mount_id = -1, mount_id_parent = -1;
×
263
        bool nosupp = false;
×
264

265
        r = name_to_handle_at_try_fid(fd, filename, &h, &mount_id, flags);
×
266
        if (r < 0) {
×
267
                if (is_name_to_handle_at_fatal_error(r))
×
268
                        return r;
269
                if (!ERRNO_IS_NOT_SUPPORTED(r))
×
270
                        goto fallback_fdinfo;
×
271

272
                /* This file system does not support name_to_handle_at(), hence let's see if the upper fs
273
                 * supports it (in which case it is a mount point), otherwise fall back to the fdinfo logic. */
274
                nosupp = true;
275
        }
276

277
        if (fd_is_self)
×
278
                r = name_to_handle_at_try_fid(fd, "..", &h_parent, &mount_id_parent, 0); /* can't work for non-directories 😢 */
×
279
        else
280
                r = name_to_handle_at_try_fid(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
×
281
        if (r < 0) {
×
282
                if (is_name_to_handle_at_fatal_error(r))
×
283
                        return r;
284
                if (!ERRNO_IS_NOT_SUPPORTED(r))
×
285
                        goto fallback_fdinfo;
×
286
                if (nosupp)
×
287
                        /* Both the parent and the directory can't do name_to_handle_at() */
288
                        goto fallback_fdinfo;
×
289

290
                /* The parent can't do name_to_handle_at() but the directory we are
291
                 * interested in can?  If so, it must be a mount point. */
292
                return 1;
293
        }
294

295
        /* The parent can do name_to_handle_at() but the directory we are interested in can't? If
296
         * so, it must be a mount point. */
297
        if (nosupp)
×
298
                return 1;
299

300
        /* If the file handle for the directory we are interested in and its parent are identical,
301
         * we assume this is the root directory, which is a mount point. */
302
        if (file_handle_equal(h_parent, h))
×
303
                return 1;
304

305
        return mount_id != mount_id_parent;
×
306

307
fallback_fdinfo:
×
308
        r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
×
309
        if (r < 0)
×
310
                return r;
311

312
        if (fd_is_self)
×
313
                r = fd_fdinfo_mnt_id(fd, "..", 0, &mount_id_parent); /* can't work for non-directories 😢 */
×
314
        else
315
                r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
×
316
        if (r < 0)
×
317
                return r;
318

319
        if (mount_id != mount_id_parent)
×
320
                return 1;
321

322
        /* Hmm, so, the mount ids are the same. This leaves one special case though for the root file
323
         * system. For that, let's see if the parent directory has the same inode as we are interested
324
         * in. */
325

326
        struct stat a, b;
×
327

328
        /* yay for fstatat() taking a different set of flags than the other _at() above */
329
        if (fstatat(fd, filename, &a, at_flags_normalize_nofollow(flags)) < 0)
×
330
                return -errno;
×
331

332
        if (fd_is_self)
×
333
                r = fstatat(fd, "..", &b, 0);
×
334
        else
335
                r = fstatat(fd, "", &b, AT_EMPTY_PATH);
×
336
        if (r < 0)
×
337
                return -errno;
×
338

339
        /* A directory with same device and inode as its parent must be the root directory. Otherwise
340
         * not a mount point.
341
         *
342
         * NB: we avoid inode_same_at() here because it internally attempts name_to_handle_at_try_fid() first,
343
         * which is redundant. */
344
        return stat_inode_same(&a, &b);
×
345
}
346

347
/* flags can be AT_SYMLINK_FOLLOW or 0 */
348
int path_is_mount_point_full(const char *path, const char *root, int flags) {
26,578✔
349
        _cleanup_close_ int dfd = -EBADF;
26,578✔
350
        _cleanup_free_ char *fn = NULL;
26,578✔
351

352
        assert(path);
26,578✔
353
        assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
26,578✔
354

355
        if (path_equal(path, "/"))
26,578✔
356
                return 1;
357

358
        /* we need to resolve symlinks manually, we can't just rely on is_mount_point_at() to do that for us;
359
         * if we have a structure like /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
360
         * look at needs to be /usr, not /. */
361
        dfd = chase_and_open_parent(path, root,
26,520✔
362
                                    CHASE_TRAIL_SLASH|(FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : CHASE_NOFOLLOW),
26,520✔
363
                                    &fn);
364
        if (dfd < 0)
26,520✔
365
                return dfd;
366

367
        return is_mount_point_at(dfd, fn, flags);
26,398✔
368
}
369

370
int path_get_mnt_id_at_fallback(int dir_fd, const char *path, int *ret) {
×
371
        int r;
×
372

373
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
×
374
        assert(ret);
×
375

376
        r = name_to_handle_at_loop(dir_fd, path, NULL, ret, isempty(path) ? AT_EMPTY_PATH : 0);
×
377
        if (r >= 0 || is_name_to_handle_at_fatal_error(r))
×
378
                return r;
×
379

380
        return fd_fdinfo_mnt_id(dir_fd, path, isempty(path) ? AT_EMPTY_PATH : 0, ret);
×
381
}
382

383
int path_get_mnt_id_at(int dir_fd, const char *path, int *ret) {
6,842✔
384
        struct statx sx;
6,842✔
385

386
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
6,842✔
387
        assert(ret);
6,842✔
388

389
        if (statx(dir_fd,
13,684✔
390
                  strempty(path),
6,842✔
391
                  (isempty(path) ? AT_EMPTY_PATH : AT_SYMLINK_NOFOLLOW) |
6,842✔
392
                  AT_NO_AUTOMOUNT |    /* don't trigger automounts, mnt_id is a local concept */
393
                  AT_STATX_DONT_SYNC,  /* don't go to the network, mnt_id is a local concept */
394
                  STATX_MNT_ID,
395
                  &sx) < 0)
396
                return -errno;
1✔
397

398
        if (FLAGS_SET(sx.stx_mask, STATX_MNT_ID)) {
6,841✔
399
                *ret = sx.stx_mnt_id;
6,841✔
400
                return 0;
6,841✔
401
        }
402

403
        return path_get_mnt_id_at_fallback(dir_fd, path, ret);
×
404
}
405

406
bool fstype_is_network(const char *fstype) {
1,622✔
407
        const char *x;
1,622✔
408

409
        x = startswith(fstype, "fuse.");
1,622✔
410
        if (x)
1,622✔
411
                fstype = x;
×
412

413
        if (nulstr_contains(filesystem_sets[FILESYSTEM_SET_NETWORK].value, fstype))
1,622✔
414
                return true;
1,622✔
415

416
        /* Filesystems not present in the internal database */
417
        return STR_IN_SET(fstype,
1,618✔
418
                          "davfs",
419
                          "glusterfs",
420
                          "lustre",
421
                          "sshfs");
422
}
423

424
bool fstype_needs_quota(const char *fstype) {
×
425
       /* 1. quotacheck needs to be run for some filesystems after they are mounted
426
        *    if the filesystem was not unmounted cleanly.
427
        * 2. You may need to run quotaon to enable quota usage tracking and/or
428
        *    enforcement.
429
        * ext2     - needs 1) and 2)
430
        * ext3     - needs 2) if configured using usrjquota/grpjquota mount options
431
        * ext4     - needs 1) if created without journal, needs 2) if created without QUOTA
432
        *            filesystem feature
433
        * reiserfs - needs 2).
434
        * jfs      - needs 2)
435
        * f2fs     - needs 2) if configured using usrjquota/grpjquota/prjjquota mount options
436
        * xfs      - nothing needed
437
        * gfs2     - nothing needed
438
        * ocfs2    - nothing needed
439
        * btrfs    - nothing needed
440
        * for reference see filesystem and quota manpages */
441
        return STR_IN_SET(fstype,
×
442
                          "ext2",
443
                          "ext3",
444
                          "ext4",
445
                          "reiserfs",
446
                          "jfs",
447
                          "f2fs");
448
}
449

450
bool fstype_is_api_vfs(const char *fstype) {
58✔
451
        assert(fstype);
58✔
452

453
        const FilesystemSet *fs;
58✔
454
        FOREACH_ARGUMENT(fs,
263✔
455
                         filesystem_sets + FILESYSTEM_SET_BASIC_API,
456
                         filesystem_sets + FILESYSTEM_SET_AUXILIARY_API,
457
                         filesystem_sets + FILESYSTEM_SET_PRIVILEGED_API,
458
                         filesystem_sets + FILESYSTEM_SET_TEMPORARY)
459
                if (nulstr_contains(fs->value, fstype))
232✔
460
                    return true;
27✔
461

462
        /* Filesystems not present in the internal database */
463
        return STR_IN_SET(fstype,
31✔
464
                          "autofs",
465
                          "cpuset",
466
                          "devtmpfs");
467
}
468

469
bool fstype_is_blockdev_backed(const char *fstype) {
29✔
470
        const char *x;
29✔
471

472
        x = startswith(fstype, "fuse.");
29✔
473
        if (x)
29✔
474
                fstype = x;
×
475

476
        return !streq(fstype, "9p") && !fstype_is_network(fstype) && !fstype_is_api_vfs(fstype);
29✔
477
}
478

479
bool fstype_is_ro(const char *fstype) {
3,338✔
480
        /* All Linux file systems that are necessarily read-only */
481
        return STR_IN_SET(fstype,
3,338✔
482
                          "DM_verity_hash",
483
                          "cramfs",
484
                          "erofs",
485
                          "iso9660",
486
                          "squashfs");
487
}
488

489
bool fstype_can_discard(const char *fstype) {
6✔
490
        assert(fstype);
6✔
491

492
        /* Use a curated list as first check, to avoid calling fsopen() which might load kmods, which might
493
         * not be allowed in our MAC context. */
494
        if (STR_IN_SET(fstype, "btrfs", "f2fs", "ext4", "vfat", "xfs"))
6✔
495
                return true;
3✔
496

497
        /* On new kernels we can just ask the kernel */
498
        return mount_option_supported(fstype, "discard", NULL) > 0;
3✔
499
}
500

501
const char* fstype_norecovery_option(const char *fstype) {
176✔
502
        int r;
176✔
503

504
        assert(fstype);
176✔
505

506
        /* Use a curated list as first check, to avoid calling fsopen() which might load kmods, which might
507
         * not be allowed in our MAC context. */
508
        if (STR_IN_SET(fstype, "ext3", "ext4", "xfs"))
176✔
509
                return "norecovery";
18✔
510

511
        /* btrfs dropped support for the "norecovery" option in 6.8
512
         * (https://github.com/torvalds/linux/commit/a1912f712188291f9d7d434fba155461f1ebef66) and replaced
513
         * it with rescue=nologreplay, so we check for the new name first and fall back to checking for the
514
         * old name if the new name doesn't work. */
515
        if (streq(fstype, "btrfs")) {
158✔
516
                r = mount_option_supported(fstype, "rescue=nologreplay", NULL);
×
517
                if (r == -EAGAIN) {
×
518
                        log_debug_errno(r, "Failed to check for btrfs 'rescue=nologreplay' option, assuming old kernel with 'norecovery': %m");
×
519
                        return "norecovery";
×
520
                }
521
                if (r < 0)
×
522
                        log_debug_errno(r, "Failed to check for btrfs 'rescue=nologreplay' option, assuming it is not supported: %m");
×
523
                if (r > 0)
×
524
                        return "rescue=nologreplay";
525
        }
526

527
        /* On new kernels we can just ask the kernel */
528
        return mount_option_supported(fstype, "norecovery", NULL) > 0 ? "norecovery" : NULL;
158✔
529
}
530

531
bool fstype_can_fmask_dmask(const char *fstype) {
57✔
532
        assert(fstype);
57✔
533

534
        /* Use a curated list as first check, to avoid calling fsopen() which might load kmods, which might
535
         * not be allowed in our MAC context. If we don't know ourselves, on new kernels we can just ask the
536
         * kernel. */
537
        return streq(fstype, "vfat") || (mount_option_supported(fstype, "fmask", "0177") > 0 && mount_option_supported(fstype, "dmask", "0077") > 0);
57✔
538
}
539

540
bool fstype_can_uid_gid(const char *fstype) {
1✔
541
        /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and
542
         * directories, current and future. Note that this does *not* ask the kernel via
543
         * mount_option_supported() here because the uid=/gid= setting of various file systems mean different
544
         * things: some apply it only to the root dir inode, others to all inodes in the file system. Thus we
545
         * maintain the curated list below. 😢 */
546

547
        return STR_IN_SET(fstype,
1✔
548
                          "adfs",
549
                          "exfat",
550
                          "fat",
551
                          "hfs",
552
                          "hpfs",
553
                          "iso9660",
554
                          "msdos",
555
                          "ntfs",
556
                          "vfat");
557
}
558

559
int dev_is_devtmpfs(void) {
316✔
560
        _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
316✔
561
        int mount_id, r;
316✔
562
        char *e;
316✔
563

564
        r = path_get_mnt_id("/dev", &mount_id);
316✔
565
        if (r < 0)
316✔
566
                return r;
567

568
        r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
316✔
569
        if (r == -ENOENT)
316✔
570
                return proc_mounted() > 0 ? -ENOENT : -ENOSYS;
×
571
        if (r < 0)
316✔
572
                return r;
573

574
        for (;;) {
14,355✔
575
                _cleanup_free_ char *line = NULL;
14,056✔
576
                int mid;
14,355✔
577

578
                r = read_line(proc_self_mountinfo, LONG_LINE_MAX, &line);
14,355✔
579
                if (r < 0)
14,355✔
580
                        return r;
581
                if (r == 0)
14,355✔
582
                        break;
583

584
                if (sscanf(line, "%i", &mid) != 1)
14,056✔
585
                        continue;
×
586

587
                if (mid != mount_id)
14,056✔
588
                        continue;
13,740✔
589

590
                e = strstrafter(line, " - ");
316✔
591
                if (!e)
316✔
592
                        continue;
×
593

594
                /* accept any name that starts with the currently expected type */
595
                if (startswith(e, "devtmpfs"))
316✔
596
                        return true;
597
        }
598

599
        return false;
299✔
600
}
601

602
static int mount_fd(
52,143✔
603
                const char *source,
604
                int target_fd,
605
                const char *filesystemtype,
606
                unsigned long mountflags,
607
                const void *data) {
608

609
        assert(target_fd >= 0);
52,143✔
610

611
        if (mount(source, FORMAT_PROC_FD_PATH(target_fd), filesystemtype, mountflags, data) < 0) {
52,143✔
612
                if (errno != ENOENT)
1,148✔
613
                        return -errno;
1,148✔
614

615
                /* ENOENT can mean two things: either that the source is missing, or that /proc/ isn't
616
                 * mounted. Check for the latter to generate better error messages. */
617
                if (proc_mounted() == 0)
565✔
618
                        return -ENOSYS;
619

620
                return -ENOENT;
565✔
621
        }
622

623
        return 0;
50,995✔
624
}
625

626
int mount_nofollow(
53,619✔
627
                const char *source,
628
                const char *target,
629
                const char *filesystemtype,
630
                unsigned long mountflags,
631
                const void *data) {
632

633
        _cleanup_close_ int fd = -EBADF;
53,619✔
634

635
        assert(target);
53,619✔
636

637
        /* In almost all cases we want to manipulate the mount table without following symlinks, hence
638
         * mount_nofollow() is usually the way to go. The only exceptions are environments where /proc/ is
639
         * not available yet, since we need /proc/self/fd/ for this logic to work. i.e. during the early
640
         * initialization of namespacing/container stuff where /proc is not yet mounted (and maybe even the
641
         * fs to mount) we can only use traditional mount() directly.
642
         *
643
         * Note that this disables following only for the final component of the target, i.e symlinks within
644
         * the path of the target are honoured, as are symlinks in the source path everywhere. */
645

646
        fd = open(target, O_PATH|O_CLOEXEC|O_NOFOLLOW);
53,619✔
647
        if (fd < 0)
53,619✔
648
                return -errno;
1,476✔
649

650
        return mount_fd(source, fd, filesystemtype, mountflags, data);
52,143✔
651
}
652

653
const char* mount_propagation_flag_to_string(unsigned long flags) {
56✔
654

655
        switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
56✔
656
        case 0:
657
                return "";
658
        case MS_SHARED:
1✔
659
                return "shared";
1✔
660
        case MS_SLAVE:
1✔
661
                return "slave";
1✔
662
        case MS_PRIVATE:
3✔
663
                return "private";
3✔
664
        }
665

666
        return NULL;
×
667
}
668

669
int mount_propagation_flag_from_string(const char *name, unsigned long *ret) {
11✔
670

671
        if (isempty(name))
11✔
672
                *ret = 0;
2✔
673
        else if (streq(name, "shared"))
9✔
674
                *ret = MS_SHARED;
2✔
675
        else if (streq(name, "slave"))
7✔
676
                *ret = MS_SLAVE;
2✔
677
        else if (streq(name, "private"))
5✔
678
                *ret = MS_PRIVATE;
3✔
679
        else
680
                return -EINVAL;
681
        return 0;
682
}
683

684
bool mount_propagation_flag_is_valid(unsigned long flag) {
2,236✔
685
        return IN_SET(flag, 0, MS_SHARED, MS_PRIVATE, MS_SLAVE);
2,236✔
686
}
687

688
bool mount_new_api_supported(void) {
7,328✔
689
        static int cache = -1;
7,328✔
690
        int r;
7,328✔
691

692
        if (cache >= 0)
7,328✔
693
                return cache;
2,427✔
694

695
        /* This is the newer API among the ones we use, so use it as boundary */
696
        r = RET_NERRNO(mount_setattr(-EBADF, NULL, 0, NULL, 0));
4,901✔
697
        if (r == 0 || ERRNO_IS_NOT_SUPPORTED(r)) /* This should return an error if it is working properly */
4,901✔
698
                return (cache = false);
×
699

700
        return (cache = true);
4,901✔
701
}
702

703
unsigned long ms_nosymfollow_supported(void) {
3,155✔
704
        _cleanup_close_ int fsfd = -EBADF, mntfd = -EBADF;
3,155✔
705
        static int cache = -1;
3,155✔
706

707
        /* Returns MS_NOSYMFOLLOW if it is supported, zero otherwise. */
708

709
        if (cache >= 0)
3,155✔
710
                return cache ? MS_NOSYMFOLLOW : 0;
687✔
711

712
        if (!mount_new_api_supported())
2,468✔
713
                goto not_supported;
×
714

715
        /* Checks if MS_NOSYMFOLLOW is supported (which was added in 5.10). We use the new mount API's
716
         * mount_setattr() call for that, which was added in 5.12, which is close enough. */
717

718
        fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
2,468✔
719
        if (fsfd < 0) {
2,468✔
UNCOV
720
                if (ERRNO_IS_NOT_SUPPORTED(errno))
×
721
                        goto not_supported;
×
722

UNCOV
723
                log_debug_errno(errno, "Failed to open superblock context for tmpfs: %m");
×
UNCOV
724
                return 0;
×
725
        }
726

727
        if (fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) {
2,468✔
728
                if (ERRNO_IS_NOT_SUPPORTED(errno))
×
729
                        goto not_supported;
×
730

731
                log_debug_errno(errno, "Failed to create tmpfs superblock: %m");
×
732
                return 0;
×
733
        }
734

735
        mntfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
2,468✔
736
        if (mntfd < 0) {
2,468✔
737
                if (ERRNO_IS_NOT_SUPPORTED(errno))
×
738
                        goto not_supported;
×
739

740
                log_debug_errno(errno, "Failed to turn superblock fd into mount fd: %m");
×
741
                return 0;
×
742
        }
743

744
        if (mount_setattr(mntfd, "", AT_EMPTY_PATH|AT_RECURSIVE,
2,468✔
745
                          &(struct mount_attr) {
2,468✔
746
                                  .attr_set = MOUNT_ATTR_NOSYMFOLLOW,
747
                          }, sizeof(struct mount_attr)) < 0) {
748
                if (ERRNO_IS_NOT_SUPPORTED(errno))
×
749
                        goto not_supported;
×
750

751
                log_debug_errno(errno, "Failed to set MOUNT_ATTR_NOSYMFOLLOW mount attribute: %m");
×
752
                return 0;
×
753
        }
754

755
        cache = true;
2,468✔
756
        return MS_NOSYMFOLLOW;
2,468✔
757

758
not_supported:
×
759
        cache = false;
×
760
        return 0;
×
761
}
762

763
int mount_option_supported(const char *fstype, const char *key, const char *value) {
1,812✔
764
        _cleanup_close_ int fd = -EBADF;
1,812✔
765
        int r;
1,812✔
766

767
        /* Checks if the specified file system supports a mount option. Returns > 0 if it supports it, == 0 if
768
         * it does not. Return -EAGAIN if we can't determine it. And any other error otherwise. */
769

770
        assert(fstype);
1,812✔
771
        assert(key);
1,812✔
772

773
        fd = fsopen(fstype, FSOPEN_CLOEXEC);
1,812✔
774
        if (fd < 0)
1,812✔
UNCOV
775
                return log_debug_errno(errno, "Failed to open superblock context for '%s': %m", fstype);
×
776

777
        /* Various file systems support fs context only in recent kernels (e.g. btrfs). For older kernels
778
         * fsconfig() with FSCONFIG_SET_STRING/FSCONFIG_SET_FLAG never fail. Which sucks, because we want to
779
         * use it for testing support, after all. Let's hence do a check if the file system got converted yet
780
         * first. */
781
        if (fsconfig(fd, FSCONFIG_SET_FD, "adefinitelynotexistingmountoption", NULL, fd) < 0) {
1,812✔
782
                /* If FSCONFIG_SET_FD is not supported for the fs, then the file system was not converted to
783
                 * the new mount API yet. If it returns EINVAL the mount option doesn't exist, but the fstype
784
                 * is converted. */
785
                if (errno == EOPNOTSUPP)
1,812✔
786
                        return -EAGAIN; /* fs not converted to new mount API → don't know */
787
                if (errno != EINVAL)
1,812✔
788
                        return log_debug_errno(errno, "Failed to check if file system '%s' has been converted to new mount API: %m", fstype);
×
789

790
                /* So FSCONFIG_SET_FD worked, but the option didn't exist (we got EINVAL), this means the fs
791
                 * is converted. Let's now ask the actual question we wonder about. */
792
        } else
793
                return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN), "FSCONFIG_SET_FD worked unexpectedly for '%s', whoa!", fstype);
×
794

795
        if (value)
1,812✔
796
                r = fsconfig(fd, FSCONFIG_SET_STRING, key, value, 0);
814✔
797
        else
798
                r = fsconfig(fd, FSCONFIG_SET_FLAG, key, NULL, 0);
998✔
799
        if (r < 0) {
1,812✔
800
                if (errno == EINVAL)
214✔
801
                        return false; /* EINVAL means option not supported. */
802

803
                return log_debug_errno(errno, "Failed to set '%s%s%s' on '%s' superblock context: %m",
×
804
                                       key, value ? "=" : "", strempty(value), fstype);
805
        }
806

807
        return true; /* works! */
808
}
809

810
bool path_below_api_vfs(const char *p) {
10,898✔
811
        assert(p);
10,898✔
812

813
        /* API VFS are either directly mounted on any of these three paths, or below it. */
814
        return PATH_STARTSWITH_SET(p, "/dev", "/sys", "/proc");
10,898✔
815
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc