• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 18578253386

16 Oct 2025 06:50PM UTC coverage: 72.363% (+0.3%) from 72.072%
18578253386

push

github

web-flow
core/mount: properly handle REMOUNTING_* states in mount_stop() (#39269)

5 of 9 new or added lines in 1 file covered. (55.56%)

3694 existing lines in 74 files now uncovered.

304611 of 420946 relevant lines covered (72.36%)

1092905.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/mountfsd/mountwork.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <linux/loop.h>
4
#include <poll.h>
5
#include <stdlib.h>
6
#include <sys/mount.h>
7
#include <unistd.h>
8

9
#include "sd-daemon.h"
10
#include "sd-event.h"
11
#include "sd-varlink.h"
12

13
#include "argv-util.h"
14
#include "bus-polkit.h"
15
#include "chase.h"
16
#include "discover-image.h"
17
#include "dissect-image.h"
18
#include "env-util.h"
19
#include "errno-util.h"
20
#include "fd-util.h"
21
#include "fs-util.h"
22
#include "format-util.h"
23
#include "hashmap.h"
24
#include "image-policy.h"
25
#include "io-util.h"
26
#include "iovec-util.h"
27
#include "json-util.h"
28
#include "loop-util.h"
29
#include "main-func.h"
30
#include "memory-util.h"
31
#include "mount-util.h"
32
#include "namespace-util.h"
33
#include "nsresource.h"
34
#include "nulstr-util.h"
35
#include "os-util.h"
36
#include "path-util.h"
37
#include "pidref.h"
38
#include "stat-util.h"
39
#include "string-table.h"
40
#include "string-util.h"
41
#include "strv.h"
42
#include "tmpfile-util.h"
43
#include "time-util.h"
44
#include "uid-classification.h"
45
#include "uid-range.h"
46
#include "user-util.h"
47
#include "varlink-io.systemd.MountFileSystem.h"
48
#include "varlink-util.h"
49

50
#define ITERATIONS_MAX 64U
51
#define RUNTIME_MAX_USEC (5 * USEC_PER_MINUTE)
52
#define PRESSURE_SLEEP_TIME_USEC (50 * USEC_PER_MSEC)
53
#define LISTEN_IDLE_USEC (90 * USEC_PER_SEC)
54

55
static const ImagePolicy image_policy_untrusted = {
56
        .n_policies = 2,
57
        .policies = {
58
                { PARTITION_ROOT,     PARTITION_POLICY_SIGNED|PARTITION_POLICY_ABSENT },
59
                { PARTITION_USR,      PARTITION_POLICY_SIGNED|PARTITION_POLICY_ABSENT },
60
        },
61
        .default_flags = PARTITION_POLICY_IGNORE,
62
};
63

64
static int json_dispatch_image_policy(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata) {
×
UNCOV
65
        _cleanup_(image_policy_freep) ImagePolicy *q = NULL;
×
66
        ImagePolicy **p = ASSERT_PTR(userdata);
×
UNCOV
67
        int r;
×
68

69
        assert(p);
×
70

UNCOV
71
        if (sd_json_variant_is_null(variant)) {
×
UNCOV
72
                *p = image_policy_free(*p);
×
73
                return 0;
×
74
        }
75

76
        if (!sd_json_variant_is_string(variant))
×
77
                return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
×
78

UNCOV
79
        r = image_policy_from_string(sd_json_variant_string(variant), &q);
×
80
        if (r < 0)
×
81
                return json_log(variant, flags, r, "JSON field '%s' is not a valid image policy.", strna(name));
×
82

UNCOV
83
        image_policy_free(*p);
×
UNCOV
84
        *p = TAKE_PTR(q);
×
UNCOV
85
        return 0;
×
86
}
87

88
typedef struct MountImageParameters {
89
        unsigned image_fd_idx;
90
        unsigned userns_fd_idx;
91
        int read_only;
92
        int growfs;
93
        char *password;
94
        ImagePolicy *image_policy;
95
        bool verity_sharing;
96
        struct iovec verity_root_hash;
97
        struct iovec verity_root_hash_sig;
98
        unsigned verity_data_fd_idx;
99
} MountImageParameters;
100

101
static void mount_image_parameters_done(MountImageParameters *p) {
×
102
        assert(p);
×
103

104
        p->password = erase_and_free(p->password);
×
105
        p->image_policy = image_policy_free(p->image_policy);
×
UNCOV
106
        iovec_done(&p->verity_root_hash);
×
107
        iovec_done(&p->verity_root_hash_sig);
×
108
}
×
109

UNCOV
110
static int validate_image_fd(int fd, MountImageParameters *p) {
×
UNCOV
111
        int r, fl;
×
112

113
        assert(fd >= 0);
×
114
        assert(p);
×
115

UNCOV
116
        struct stat st;
×
UNCOV
117
        if (fstat(fd, &st) < 0)
×
118
                return -errno;
×
119
        /* Only support regular files and block devices. Let's use stat_verify_regular() here for the nice
120
         * error numbers it generates. */
UNCOV
121
        if (!S_ISBLK(st.st_mode)) {
×
122
                r = stat_verify_regular(&st);
×
UNCOV
123
                if (r < 0)
×
124
                        return r;
125
        }
126

UNCOV
127
        fl = fd_verify_safe_flags(fd);
×
UNCOV
128
        if (fl < 0)
×
UNCOV
129
                return log_debug_errno(fl, "Image file descriptor has unsafe flags set: %m");
×
130

UNCOV
131
        switch (fl & O_ACCMODE_STRICT) {
×
132

UNCOV
133
        case O_RDONLY:
×
UNCOV
134
                p->read_only = true;
×
UNCOV
135
                break;
×
136

137
        case O_RDWR:
138
                break;
139

140
        default:
141
                return -EBADF;
142
        }
143

144
        return 0;
145
}
146

147
static int verify_trusted_image_fd_by_path(int fd) {
×
UNCOV
148
        int r;
×
149

150
        assert(fd >= 0);
×
151

152
        r = secure_getenv_bool("SYSTEMD_MOUNTFSD_TRUSTED_DIRECTORIES");
×
153
        if (r == -ENXIO)  {
×
154
                if (!DEFAULT_MOUNTFSD_TRUSTED_DIRECTORIES) {
×
UNCOV
155
                        log_debug("Trusted directory mechanism disabled at compile time.");
×
UNCOV
156
                        return false;
×
157
                }
158
        } else if (r < 0) {
×
159
                log_debug_errno(r, "Failed to parse $SYSTEMD_MOUNTFSD_TRUSTED_DIRECTORIES environment variable, not trusting any image.");
×
160
                return false;
×
UNCOV
161
        } else if (!r) {
×
162
                log_debug("Trusted directory mechanism disabled via $SYSTEMD_MOUNTFSD_TRUSTED_DIRECTORIES environment variable.");
×
163
                return false;
×
164
        }
165

166
        _cleanup_free_ char *p = NULL;
×
167
        r = fd_get_path(fd, &p);
×
UNCOV
168
        if (r < 0)
×
UNCOV
169
                return log_debug_errno(r, "Failed to get path of passed image file descriptor: %m");
×
170

UNCOV
171
        struct stat sta;
×
172
        if (fstat(fd, &sta) < 0)
×
173
                return log_debug_errno(errno, "Failed to stat() passed image file descriptor: %m");
×
174
        if (!S_ISREG(sta.st_mode)) {
×
175
                log_debug("Image '%s' is not a regular file, hence skipping trusted directory check.", p);
×
176
                return false;
×
177
        }
178

179
        log_debug("Checking if image '%s' is in trusted directories.", p);
×
180

181
        for (ImageClass c = 0; c < _IMAGE_CLASS_MAX; c++)
×
182
                NULSTR_FOREACH(s, image_search_path[c]) {
×
183
                        _cleanup_close_ int dir_fd = -EBADF, inode_fd = -EBADF;
×
184
                        _cleanup_free_ char *q = NULL;
×
UNCOV
185
                        struct stat stb;
×
UNCOV
186
                        const char *e;
×
187

UNCOV
188
                        r = chase(s, NULL, CHASE_SAFE|CHASE_TRIGGER_AUTOFS, &q, &dir_fd);
×
189
                        if (r == -ENOENT)
×
190
                                continue;
×
191
                        if (r < 0) {
×
UNCOV
192
                                log_warning_errno(r, "Failed to resolve search path '%s', ignoring: %m", s);
×
193
                                continue;
×
194
                        }
195

196
                        /* Check that the inode refers to a file immediately inside the image directory,
197
                         * i.e. not the image directory itself, and nothing further down the tree */
198
                        e = path_startswith(p, q);
×
199
                        if (isempty(e))
×
UNCOV
200
                                continue;
×
201

202
                        e += strspn(e, "/");
×
UNCOV
203
                        if (!filename_is_valid(e))
×
204
                                continue;
×
205

206
                        r = chaseat(dir_fd, e, CHASE_SAFE|CHASE_TRIGGER_AUTOFS, NULL, &inode_fd);
×
UNCOV
207
                        if (r < 0)
×
UNCOV
208
                                return log_error_errno(r, "Couldn't verify that specified image '%s' is in search path '%s': %m", p, s);
×
209

210
                        if (fstat(inode_fd, &stb) < 0)
×
UNCOV
211
                                return log_error_errno(errno, "Failed to stat image file '%s/%s': %m", q, e);
×
212

UNCOV
213
                        if (stat_inode_same(&sta, &stb)) {
×
214
                                log_debug("Image '%s' is *in* trusted directories.", p);
×
UNCOV
215
                                return true; /* Yay */
×
216
                        }
217
                }
218

UNCOV
219
        log_debug("Image '%s' is *not* in trusted directories.", p);
×
220
        return false;
221
}
222

223
static int determine_image_policy(
×
224
                int image_fd,
225
                bool trusted,
226
                ImagePolicy *client_policy,
227
                ImagePolicy **ret) {
228

UNCOV
229
        _cleanup_(image_policy_freep) ImagePolicy *envvar_policy = NULL;
×
UNCOV
230
        const ImagePolicy *default_policy;
×
UNCOV
231
        const char *envvar, *e;
×
232
        int r;
×
233

UNCOV
234
        assert(image_fd >= 0);
×
UNCOV
235
        assert(ret);
×
236

237
        if (trusted) {
×
238
                envvar = "SYSTEMD_MOUNTFSD_IMAGE_POLICY_TRUSTED";
239
                default_policy = &image_policy_allow;
240
        } else {
UNCOV
241
                envvar = "SYSTEMD_MOUNTFSD_IMAGE_POLICY_UNTRUSTED";
×
242
                default_policy = &image_policy_untrusted;
×
243
        }
244

245
        e = secure_getenv(envvar);
×
UNCOV
246
        if (e) {
×
UNCOV
247
                r = image_policy_from_string(e, &envvar_policy);
×
248
                if (r < 0)
×
249
                        return log_error_errno(r, "Failed to parse image policy supplied via $%s: %m", envvar);
×
250

251
                default_policy = envvar_policy;
×
252
        }
253

254
        return image_policy_intersect(default_policy, client_policy, ret);
×
255
}
256

257
static int validate_userns(sd_varlink *link, int *userns_fd) {
×
258
        int r;
×
259

UNCOV
260
        assert(link);
×
261
        assert(userns_fd);
×
262

UNCOV
263
        if (*userns_fd < 0)
×
264
                return 0;
265

UNCOV
266
        r = fd_verify_safe_flags(*userns_fd);
×
UNCOV
267
        if (r < 0)
×
268
                return log_debug_errno(r, "User namespace file descriptor has unsafe flags set: %m");
×
269

270
        r = fd_is_namespace(*userns_fd, NAMESPACE_USER);
×
271
        if (r < 0)
×
272
                return r;
273
        if (r == 0)
×
UNCOV
274
                return sd_varlink_error_invalid_parameter_name(link, "userNamespaceFileDescriptor");
×
275

276
        /* Our own host user namespace? Then close the fd, and handle it as if none was specified. */
UNCOV
277
        r = is_our_namespace(*userns_fd, NAMESPACE_USER);
×
UNCOV
278
        if (r < 0)
×
279
                return log_debug_errno(r, "Failed to determine if user namespace provided by client is our own.");
×
UNCOV
280
        if (r > 0) {
×
UNCOV
281
                log_debug("User namespace provided by client is our own.");
×
UNCOV
282
                *userns_fd = safe_close(*userns_fd);
×
283
        }
284

285
        return 0;
286
}
287

UNCOV
288
static int vl_method_mount_image(
×
289
                sd_varlink *link,
290
                sd_json_variant *parameters,
291
                sd_varlink_method_flags_t flags,
292
                void *userdata) {
293

UNCOV
294
        static const sd_json_dispatch_field dispatch_table[] = {
×
295
                { "imageFileDescriptor",         SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,        offsetof(MountImageParameters, image_fd_idx),         SD_JSON_MANDATORY },
296
                { "userNamespaceFileDescriptor", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,        offsetof(MountImageParameters, userns_fd_idx),        0 },
297
                { "readOnly",                    SD_JSON_VARIANT_BOOLEAN,  sd_json_dispatch_tristate,    offsetof(MountImageParameters, read_only),            0 },
298
                { "growFileSystems",             SD_JSON_VARIANT_BOOLEAN,  sd_json_dispatch_tristate,    offsetof(MountImageParameters, growfs),               0 },
299
                { "password",                    SD_JSON_VARIANT_STRING,   sd_json_dispatch_string,      offsetof(MountImageParameters, password),             0 },
300
                { "imagePolicy",                 SD_JSON_VARIANT_STRING,   json_dispatch_image_policy,   offsetof(MountImageParameters, image_policy),         0 },
301
                { "veritySharing",               SD_JSON_VARIANT_BOOLEAN,  sd_json_dispatch_stdbool,     offsetof(MountImageParameters, verity_sharing),       0 },
302
                { "verityDataFileDescriptor",    SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,        offsetof(MountImageParameters, verity_data_fd_idx),   0 },
303
                { "verityRootHash",              SD_JSON_VARIANT_STRING,   json_dispatch_unhex_iovec,    offsetof(MountImageParameters, verity_root_hash),     0 },
304
                { "verityRootHashSignature",     SD_JSON_VARIANT_STRING,   json_dispatch_unbase64_iovec, offsetof(MountImageParameters, verity_root_hash_sig), 0 },
305
                VARLINK_DISPATCH_POLKIT_FIELD,
306
                {}
307
        };
308

309
        _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
×
310
        _cleanup_(mount_image_parameters_done) MountImageParameters p = {
×
311
                .image_fd_idx = UINT_MAX,
312
                .userns_fd_idx = UINT_MAX,
313
                .verity_data_fd_idx = UINT_MAX,
314
                .read_only = -1,
315
                .growfs = -1,
316
        };
UNCOV
317
        _cleanup_(dissected_image_unrefp) DissectedImage *di = NULL;
×
318
        _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
×
319
        _cleanup_(sd_json_variant_unrefp) sd_json_variant *aj = NULL;
×
UNCOV
320
        _cleanup_close_ int image_fd = -EBADF, userns_fd = -EBADF, verity_data_fd = -EBADF;
×
UNCOV
321
        _cleanup_(image_policy_freep) ImagePolicy *use_policy = NULL;
×
322
        Hashmap **polkit_registry = ASSERT_PTR(userdata);
×
323
        _cleanup_free_ char *ps = NULL;
×
324
        bool image_is_trusted = false;
×
325
        int r;
×
326

UNCOV
327
        assert(link);
×
328
        assert(parameters);
×
329

330
        sd_json_variant_sensitive(parameters); /* might contain passwords */
×
331

UNCOV
332
        r = sd_varlink_dispatch(link, parameters, dispatch_table, &p);
×
UNCOV
333
        if (r != 0)
×
334
                return r;
335

336
        /* Verity data and roothash have to be either both set, or both unset. The sig can be set only if
337
         * the roothash is set. */
338
        if ((p.verity_data_fd_idx != UINT_MAX) != (p.verity_root_hash.iov_len > 0))
×
339
                return sd_varlink_error_invalid_parameter_name(link, "verityDataFileDescriptor");
×
UNCOV
340
        if (p.verity_root_hash_sig.iov_len > 0 && p.verity_root_hash.iov_len == 0)
×
UNCOV
341
                return sd_varlink_error_invalid_parameter_name(link, "verityRootHashSignature");
×
342

343
        if (p.image_fd_idx != UINT_MAX) {
×
UNCOV
344
                image_fd = sd_varlink_peek_dup_fd(link, p.image_fd_idx);
×
345
                if (image_fd < 0)
×
UNCOV
346
                        return log_debug_errno(image_fd, "Failed to peek image fd from client: %m");
×
347
        }
348

UNCOV
349
        if (p.userns_fd_idx != UINT_MAX) {
×
UNCOV
350
                userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx);
×
UNCOV
351
                if (userns_fd < 0)
×
352
                        return log_debug_errno(userns_fd, "Failed to peek user namespace fd from client: %m");
×
353
        }
354

UNCOV
355
        r = validate_image_fd(image_fd, &p);
×
UNCOV
356
        if (r < 0)
×
357
                return r;
358

UNCOV
359
        r = validate_userns(link, &userns_fd);
×
UNCOV
360
        if (r != 0)
×
361
                return r;
362

UNCOV
363
        r = verify_trusted_image_fd_by_path(image_fd);
×
UNCOV
364
        if (r < 0)
×
365
                return r;
UNCOV
366
        image_is_trusted = r;
×
367

UNCOV
368
        if (p.verity_data_fd_idx != UINT_MAX) {
×
369
                verity_data_fd = sd_varlink_peek_dup_fd(link, p.verity_data_fd_idx);
×
UNCOV
370
                if (verity_data_fd < 0)
×
UNCOV
371
                        return log_debug_errno(verity_data_fd, "Failed to peek verity data fd from client: %m");
×
372

UNCOV
373
                r = fd_verify_safe_flags(verity_data_fd);
×
UNCOV
374
                if (r < 0)
×
UNCOV
375
                        return log_debug_errno(r, "Verity data file descriptor has unsafe flags set: %m");
×
376

377
                verity.data_path = strdup(FORMAT_PROC_FD_PATH(verity_data_fd));
×
UNCOV
378
                if (!verity.data_path)
×
379
                        return -ENOMEM;
380

UNCOV
381
                verity.designator = PARTITION_ROOT;
×
382

383
                verity.root_hash = TAKE_PTR(p.verity_root_hash.iov_base);
×
384
                verity.root_hash_size = p.verity_root_hash.iov_len;
×
UNCOV
385
                p.verity_root_hash.iov_len = 0;
×
386

387
                verity.root_hash_sig = TAKE_PTR(p.verity_root_hash_sig.iov_base);
×
UNCOV
388
                verity.root_hash_sig_size = p.verity_root_hash_sig.iov_len;
×
389
                p.verity_root_hash_sig.iov_len = 0;
×
390
        }
391

UNCOV
392
        const char *polkit_details[] = {
×
UNCOV
393
                "read_only", one_zero(p.read_only > 0),
×
394
                NULL,
395
        };
396

UNCOV
397
        const char *polkit_action, *polkit_untrusted_action;
×
UNCOV
398
        PolkitFlags polkit_flags;
×
399
        if (userns_fd < 0) {
×
400
                /* Mount into the host user namespace */
401
                polkit_action = "io.systemd.mount-file-system.mount-image";
402
                polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-image";
403
                polkit_flags = 0;
404
        } else {
405
                /* Mount into a private user namespace */
UNCOV
406
                polkit_action = "io.systemd.mount-file-system.mount-image-privately";
×
UNCOV
407
                polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-image-privately";
×
408

409
                /* If polkit is not around, let's allow mounting authenticated images by default */
410
                polkit_flags = POLKIT_DEFAULT_ALLOW;
×
411
        }
412

413
        /* Let's definitely acquire the regular action privilege, for mounting properly signed images */
414
        r = varlink_verify_polkit_async_full(
×
415
                        link,
416
                        /* bus= */ NULL,
417
                        polkit_action,
418
                        polkit_details,
419
                        /* good_user= */ UID_INVALID,
420
                        polkit_flags,
421
                        polkit_registry);
UNCOV
422
        if (r <= 0)
×
423
                return r;
424

425
        /* Generate the common dissection directory here. We are not going to use it, but the clients might,
426
         * and they likely are unprivileged, hence cannot create it themselves. Hence let's just create it
427
         * here, if it is missing. */
428
        r = get_common_dissect_directory(NULL);
×
UNCOV
429
        if (r < 0)
×
430
                return r;
431

432
        r = loop_device_make(
×
433
                        image_fd,
UNCOV
434
                        p.read_only == 0 ? O_RDONLY : O_RDWR,
×
435
                        0,
436
                        UINT64_MAX,
437
                        UINT32_MAX,
438
                        LO_FLAGS_PARTSCAN,
439
                        LOCK_EX,
440
                        &loop);
UNCOV
441
        if (r < 0)
×
442
                return r;
443

UNCOV
444
        DissectImageFlags dissect_flags =
×
445
                (p.read_only == 0 ? DISSECT_IMAGE_READ_ONLY : 0) |
×
446
                (p.growfs != 0 ? DISSECT_IMAGE_GROWFS : 0) |
×
447
                DISSECT_IMAGE_DISCARD_ANY |
448
                DISSECT_IMAGE_FSCK |
449
                DISSECT_IMAGE_ADD_PARTITION_DEVICES |
×
450
                DISSECT_IMAGE_PIN_PARTITION_DEVICES |
×
451
                (p.verity_sharing ? DISSECT_IMAGE_VERITY_SHARE : 0) |
×
UNCOV
452
                (p.verity_data_fd_idx != UINT_MAX ? DISSECT_IMAGE_NO_PARTITION_TABLE : 0) |
×
453
                DISSECT_IMAGE_ALLOW_USERSPACE_VERITY;
454

455
        /* Let's see if we have acquired the privilege to mount untrusted images already */
456
        bool polkit_have_untrusted_action =
×
UNCOV
457
                varlink_has_polkit_action(link, polkit_untrusted_action, polkit_details, polkit_registry);
×
458

UNCOV
459
        for (;;) {
×
UNCOV
460
                use_policy = image_policy_free(use_policy);
×
UNCOV
461
                ps = mfree(ps);
×
462

463
                /* We use the image policy for trusted images if either the path is below a trusted
464
                 * directory, or if we have already acquired a PK authentication that tells us that untrusted
465
                 * images are OK */
466
                bool use_trusted_policy =
×
467
                        image_is_trusted ||
468
                        polkit_have_untrusted_action;
469

470
                r = determine_image_policy(
×
471
                                image_fd,
472
                                use_trusted_policy,
473
                                p.image_policy,
474
                                &use_policy);
UNCOV
475
                if (r < 0)
×
476
                        return r;
477

UNCOV
478
                r = image_policy_to_string(use_policy, /* simplify= */ true, &ps);
×
UNCOV
479
                if (r < 0)
×
480
                        return r;
481

UNCOV
482
                log_debug("Using image policy: %s", ps);
×
483

UNCOV
484
                r = dissect_loop_device(
×
485
                                loop,
486
                                &verity,
487
                                /* mount_options= */ NULL,
488
                                use_policy,
489
                                /* image_filter= */ NULL,
490
                                dissect_flags,
491
                                &di);
UNCOV
492
                if (r == -ENOPKG)
×
493
                        return sd_varlink_error(link, "io.systemd.MountFileSystem.IncompatibleImage", NULL);
×
UNCOV
494
                if (r == -ENOTUNIQ)
×
UNCOV
495
                        return sd_varlink_error(link, "io.systemd.MountFileSystem.MultipleRootPartitionsFound", NULL);
×
496
                if (r == -ENXIO)
×
UNCOV
497
                        return sd_varlink_error(link, "io.systemd.MountFileSystem.RootPartitionNotFound", NULL);
×
498
                if (r == -ERFKILL) {
×
499
                        /* The image policy refused this, let's retry after trying to get PolicyKit */
500

UNCOV
501
                        if (!polkit_have_untrusted_action) {
×
502
                                log_debug("Denied by image policy. Trying a stronger polkit authentication before continuing.");
×
UNCOV
503
                                r = varlink_verify_polkit_async_full(
×
504
                                                link,
505
                                                /* bus= */ NULL,
506
                                                polkit_untrusted_action,
507
                                                polkit_details,
508
                                                /* good_user= */ UID_INVALID,
509
                                                /* flags= */ 0,                   /* NB: the image cannot be authenticated, hence unless PK is around to allow this anyway, fail! */
510
                                                polkit_registry);
511
                                if (r <= 0 && !ERRNO_IS_NEG_PRIVILEGE(r))
×
512
                                        return r;
UNCOV
513
                                if (r > 0) {
×
514
                                        /* Try again, now that we know the client has enough privileges. */
UNCOV
515
                                        log_debug("Denied by image policy, retrying after polkit authentication.");
×
UNCOV
516
                                        polkit_have_untrusted_action = true;
×
UNCOV
517
                                        continue;
×
518
                                }
519
                        }
520

521
                        return sd_varlink_error(link, "io.systemd.MountFileSystem.DeniedByImagePolicy", NULL);
×
522
                }
UNCOV
523
                if (r < 0)
×
524
                        return r;
525

526
                /* Success */
UNCOV
527
                break;
×
528
        }
529

UNCOV
530
        r = dissected_image_load_verity_sig_partition(
×
531
                        di,
532
                        loop->fd,
×
533
                        &verity);
534
        if (r < 0)
×
535
                return r;
536

537
        r = dissected_image_guess_verity_roothash(
×
538
                        di,
539
                        &verity);
540
        if (r < 0)
×
541
                return r;
542

UNCOV
543
        r = dissected_image_decrypt(
×
544
                        di,
UNCOV
545
                        p.password,
×
546
                        &verity,
547
                        use_policy,
548
                        dissect_flags);
549
        if (r == -ENOKEY) /* new dm-verity userspace returns ENOKEY if the dm-verity signature key is not in
×
550
                           * key chain. That's great. */
551
                return sd_varlink_error(link, "io.systemd.MountFileSystem.KeyNotFound", NULL);
×
UNCOV
552
        if (r == -EBUSY) /* DM kernel subsystem is shit with returning useful errors hence we keep retrying
×
553
                          * under the assumption that some errors are transitional. Which the errors might
554
                          * not actually be. After all retries failed we return EBUSY. Let's turn that into a
555
                          * generic Verity error. It's not very helpful, could mean anything, but at least it
556
                          * gives client a clear idea that this has to do with Verity. */
UNCOV
557
                return sd_varlink_error(link, "io.systemd.MountFileSystem.VerityFailure", NULL);
×
UNCOV
558
        if (r < 0)
×
559
                return r;
560

UNCOV
561
        r = dissected_image_mount(
×
562
                        di,
563
                        /* where= */ NULL,
564
                        /* uid_shift= */ UID_INVALID,
565
                        /* uid_range= */ UID_INVALID,
566
                        userns_fd,
567
                        dissect_flags);
568
        if (r < 0)
×
569
                return r;
570

UNCOV
571
        for (PartitionDesignator d = 0; d < _PARTITION_DESIGNATOR_MAX; d++) {
×
572
                DissectedPartition *pp = di->partitions + d;
×
UNCOV
573
                int fd_idx;
×
574

UNCOV
575
                if (!pp->found)
×
UNCOV
576
                        continue;
×
577

UNCOV
578
                if (pp->fsmount_fd < 0)
×
UNCOV
579
                        continue;
×
580

UNCOV
581
                if (userns_fd >= 0) {
×
UNCOV
582
                        r = nsresource_add_mount(userns_fd, pp->fsmount_fd);
×
UNCOV
583
                        if (r < 0)
×
UNCOV
584
                                return r;
×
585
                }
586

UNCOV
587
                fd_idx = sd_varlink_push_fd(link, pp->fsmount_fd);
×
UNCOV
588
                if (fd_idx < 0)
×
589
                        return fd_idx;
590

UNCOV
591
                TAKE_FD(pp->fsmount_fd);
×
592

UNCOV
593
                const char *m = partition_mountpoint_to_string(d);
×
UNCOV
594
                _cleanup_strv_free_ char **l = NULL;
×
UNCOV
595
                if (!isempty(m)) {
×
UNCOV
596
                        l = strv_split_nulstr(m);
×
UNCOV
597
                        if (!l)
×
UNCOV
598
                                return log_oom_debug();
×
599
                }
600

UNCOV
601
                r = sd_json_variant_append_arraybo(
×
602
                                &aj,
603
                                SD_JSON_BUILD_PAIR("designator", SD_JSON_BUILD_STRING(partition_designator_to_string(d))),
604
                                SD_JSON_BUILD_PAIR("writable", SD_JSON_BUILD_BOOLEAN(pp->rw)),
605
                                SD_JSON_BUILD_PAIR("growFileSystem", SD_JSON_BUILD_BOOLEAN(pp->growfs)),
606
                                SD_JSON_BUILD_PAIR_CONDITION(pp->partno > 0, "partitionNumber", SD_JSON_BUILD_INTEGER(pp->partno)),
607
                                SD_JSON_BUILD_PAIR_CONDITION(pp->architecture > 0, "architecture", SD_JSON_BUILD_STRING(architecture_to_string(pp->architecture))),
608
                                SD_JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(pp->uuid), "partitionUuid", SD_JSON_BUILD_UUID(pp->uuid)),
609
                                SD_JSON_BUILD_PAIR("fileSystemType", SD_JSON_BUILD_STRING(dissected_partition_fstype(pp))),
610
                                SD_JSON_BUILD_PAIR_CONDITION(!!pp->label, "partitionLabel", SD_JSON_BUILD_STRING(pp->label)),
611
                                SD_JSON_BUILD_PAIR("size", SD_JSON_BUILD_UNSIGNED(pp->size)),
612
                                SD_JSON_BUILD_PAIR("offset", SD_JSON_BUILD_UNSIGNED(pp->offset)),
613
                                SD_JSON_BUILD_PAIR("mountFileDescriptor", SD_JSON_BUILD_INTEGER(fd_idx)),
614
                                JSON_BUILD_PAIR_STRV_NON_EMPTY("mountPoint", l));
UNCOV
615
                if (r < 0)
×
616
                        return r;
617
        }
618

UNCOV
619
        loop_device_relinquish(loop);
×
620

621
        return sd_varlink_replybo(
×
622
                        link,
623
                        SD_JSON_BUILD_PAIR("partitions", SD_JSON_BUILD_VARIANT(aj)),
624
                        SD_JSON_BUILD_PAIR("imagePolicy", SD_JSON_BUILD_STRING(ps)),
625
                        SD_JSON_BUILD_PAIR("imageSize", SD_JSON_BUILD_UNSIGNED(di->image_size)),
626
                        SD_JSON_BUILD_PAIR("sectorSize", SD_JSON_BUILD_UNSIGNED(di->sector_size)),
627
                        SD_JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(di->image_uuid), "imageUuid", SD_JSON_BUILD_UUID(di->image_uuid)));
628
}
629

630
typedef enum MountMapMode {
631
        MOUNT_MAP_AUTO = 0,     /* determine automatically from image and caller */
632
        MOUNT_MAP_ROOT,         /* map caller's UID to root in namespace (map 1 UID only) */
633
        MOUNT_MAP_FOREIGN,      /* map foreign UID range to base in namespace (map 64K) */
634
        MOUNT_MAP_IDENTITY,     /* apply identity mapping (map 64K) */
635
        _MOUNT_MAP_MODE_MAX,
636
        _MOUNT_MAP_MODE_INVALID = -EINVAL,
637
} MountMapMode;
638

639
static const char *const mount_map_mode_table[_MOUNT_MAP_MODE_MAX] = {
640
        [MOUNT_MAP_AUTO]     = "auto",
641
        [MOUNT_MAP_ROOT]     = "root",
642
        [MOUNT_MAP_FOREIGN]  = "foreign",
643
        [MOUNT_MAP_IDENTITY] = "identity",
644
};
645

UNCOV
646
DEFINE_PRIVATE_STRING_TABLE_LOOKUP(mount_map_mode, MountMapMode);
×
647

648
typedef struct MountDirectoryParameters {
649
        MountMapMode mode;
650
        unsigned directory_fd_idx;
651
        unsigned userns_fd_idx;
652
        int read_only;
653
} MountDirectoryParameters;
654

655
typedef enum DirectoryOwnership {
656
        DIRECTORY_IS_ROOT_PEER_OWNED,  /* This is returned if the directory is owned by the root user and the peer is root */
657
        DIRECTORY_IS_ROOT_OWNED,       /* This is returned if the directory is owned by the root user (and the peer user is not root) */
658
        DIRECTORY_IS_PEER_OWNED,       /* This is returned if the directory is owned by the peer user (who is not root) */
659
        DIRECTORY_IS_FOREIGN_OWNED,    /* This is returned if the directory is owned by the foreign UID range */
660
        DIRECTORY_IS_OTHERWISE_OWNED,  /* This is returned if the directory is owned by something else */
661
        _DIRECTORY_OWNERSHIP_MAX,
662
        _DIRECTORY_OWNERSHIP_ERRNO_MAX = -ERRNO_MAX, /* Guarantee the whole negative errno range fits */
663
} DirectoryOwnership;
664

665
static MountMapMode default_mount_map_mode(DirectoryOwnership ownership) {
×
666
        /* Derives a suitable mapping mode from the ownership of the base tree */
667

668
        switch (ownership) {
×
669
        case DIRECTORY_IS_PEER_OWNED:
670
                return MOUNT_MAP_ROOT;     /* Map the peer's UID to root in the container */
671

672
        case DIRECTORY_IS_FOREIGN_OWNED:
×
673
                return MOUNT_MAP_FOREIGN;  /* Map the foreign UID range to the container's UID range */
×
674

675
        case DIRECTORY_IS_ROOT_PEER_OWNED:
×
676
        case DIRECTORY_IS_ROOT_OWNED:
677
        case DIRECTORY_IS_OTHERWISE_OWNED:
UNCOV
678
                return MOUNT_MAP_IDENTITY; /* Don't map */
×
679

UNCOV
680
        default:
×
UNCOV
681
                return _MOUNT_MAP_MODE_INVALID;
×
682
        }
683
}
684

UNCOV
685
static JSON_DISPATCH_ENUM_DEFINE(dispatch_mount_directory_mode, MountMapMode, mount_map_mode_from_string);
×
686

UNCOV
687
static DirectoryOwnership validate_directory_fd(
×
688
                int fd,
689
                uid_t peer_uid,
690
                uid_t *ret_current_owner_uid) {
691

UNCOV
692
        int r, fl;
×
693

694
        assert(fd >= 0);
×
695
        assert(uid_is_valid(peer_uid));
×
696
        assert(ret_current_owner_uid);
×
697

698
        /* Checks if the specified directory fd looks sane. Returns a DirectoryOwnership that categorizes the
699
         * ownership situation in comparison to the peer's UID.
700
         *
701
         * Note one key difference to image validation (as implemented above): for regular files if the
702
         * client provided us with an open fd it implies the client has access, as well as what kind of
703
         * access (i.e. ro or rw). But for directories this doesn't work the same way, as directories are
704
         * always opened read-only only. Hence we use a different mechanism to validate access to them: we
705
         * check if the directory is owned by the peer UID or by the foreign UID range (in the latter case
706
         * one of the parent directories must be owned by the peer though). */
707

UNCOV
708
        struct stat st;
×
709
        if (fstat(fd, &st) < 0)
×
710
                return log_debug_errno(errno, "Failed to stat() directory fd: %m");
×
711

UNCOV
712
        r = stat_verify_directory(&st);
×
UNCOV
713
        if (r < 0)
×
714
                return r;
715

716
        fl = fd_verify_safe_flags_full(fd, O_DIRECTORY|O_PATH);
×
UNCOV
717
        if (fl < 0)
×
UNCOV
718
                return log_debug_errno(fl, "Directory file descriptor has unsafe flags set: %m");
×
719

720
        if (st.st_uid == 0) {
×
UNCOV
721
                *ret_current_owner_uid = st.st_uid;
×
UNCOV
722
                if (peer_uid == 0) {
×
723
                        log_debug("Directory file descriptor points to root owned directory, who is also the peer.");
×
UNCOV
724
                        return DIRECTORY_IS_ROOT_PEER_OWNED;
×
725
                }
UNCOV
726
                log_debug("Directory file descriptor points to root owned directory.");
×
727
                return DIRECTORY_IS_ROOT_OWNED;
×
728
        }
UNCOV
729
        if (st.st_uid == peer_uid) {
×
UNCOV
730
                log_debug("Directory file descriptor points to peer owned directory.");
×
UNCOV
731
                *ret_current_owner_uid = st.st_uid;
×
UNCOV
732
                return DIRECTORY_IS_PEER_OWNED;
×
733
        }
734

735
        /* For bind mounted directories we check if they are either owned by the client's UID, or by the
736
         * foreign UID set, but in that case the parent directory must be owned by the client's UID, or some
737
         * directory iteratively up the chain */
738

UNCOV
739
        _cleanup_close_ int parent_fd = -EBADF;
×
740
        unsigned n_level;
UNCOV
741
        for (n_level = 0; n_level < 16; n_level++) {
×
742
                /* Stop iteration if we find a directory up the tree that is neither owned by the user, nor is from the foreign UID range */
UNCOV
743
                if (!uid_is_foreign(st.st_uid) || !gid_is_foreign(st.st_gid)) {
×
UNCOV
744
                        log_debug("Directory file descriptor points to directory which itself or its parents is neither owned by foreign UID range nor by the user.");
×
UNCOV
745
                        *ret_current_owner_uid = st.st_uid;
×
UNCOV
746
                        return DIRECTORY_IS_OTHERWISE_OWNED;
×
747
                }
748

749
                /* If the peer is root, then it doesn't matter if we find a parent owned by root, let's shortcut things. */
750
                if (peer_uid == 0) {
×
UNCOV
751
                        log_debug("Directory file descriptor is owned by foreign UID range, and peer is root.");
×
752
                        *ret_current_owner_uid = st.st_uid;
×
753
                        return DIRECTORY_IS_FOREIGN_OWNED;
×
754
                }
755

756
                /* Go one level up */
757
                _cleanup_close_ int new_parent_fd = openat(fd, "..", O_DIRECTORY|O_PATH|O_CLOEXEC);
×
UNCOV
758
                if (new_parent_fd < 0)
×
759
                        return log_debug_errno(errno, "Failed to open parent directory of directory file descriptor: %m");
×
760

761
                struct stat new_st;
×
UNCOV
762
                if (fstat(new_parent_fd, &new_st) < 0)
×
763
                        return log_debug_errno(errno, "Failed to stat parent directory of directory file descriptor: %m");
×
764

765
                /* Safety check to see if we hit the root dir */
766
                if (stat_inode_same(&st, &new_st)) {
×
UNCOV
767
                        log_debug("Directory file descriptor is owned by foreign UID range, but didn't find parent directory that is owned by peer among ancestors.");
×
UNCOV
768
                        *ret_current_owner_uid = st.st_uid;
×
769
                        return DIRECTORY_IS_OTHERWISE_OWNED;
×
770
                }
771

772
                if (new_st.st_uid == peer_uid) { /* Parent inode is owned by the peer. That's good! Everything's fine. */
×
UNCOV
773
                        log_debug("Directory file descriptor is owned by foreign UID range, and ancestor is owned by peer.");
×
774
                        *ret_current_owner_uid = st.st_uid;
×
775
                        return DIRECTORY_IS_FOREIGN_OWNED;
×
776
                }
777

UNCOV
778
                close_and_replace(parent_fd, new_parent_fd);
×
UNCOV
779
                st = new_st;
×
780
        }
781

UNCOV
782
        log_debug("Failed to find peer owned parent directory after %u levels, refusing.", n_level);
×
UNCOV
783
        *ret_current_owner_uid = st.st_uid;
×
UNCOV
784
        return DIRECTORY_IS_OTHERWISE_OWNED;
×
785
}
786

787
static int vl_method_mount_directory(
×
788
                sd_varlink *link,
789
                sd_json_variant *parameters,
790
                sd_varlink_method_flags_t flags,
791
                void *userdata) {
792

793
        static const sd_json_dispatch_field dispatch_table[] = {
×
794
                { "mode",                        SD_JSON_VARIANT_STRING,   dispatch_mount_directory_mode, offsetof(MountDirectoryParameters, mode),             0                 },
795
                { "directoryFileDescriptor",     SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,         offsetof(MountDirectoryParameters, directory_fd_idx), SD_JSON_MANDATORY },
796
                { "userNamespaceFileDescriptor", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,         offsetof(MountDirectoryParameters, userns_fd_idx),    0                 },
797
                { "readOnly",                    SD_JSON_VARIANT_BOOLEAN,  sd_json_dispatch_tristate,     offsetof(MountDirectoryParameters, read_only),        0                 },
798
                VARLINK_DISPATCH_POLKIT_FIELD,
799
                {}
800
        };
801

802
        MountDirectoryParameters p = {
×
803
                .mode = MOUNT_MAP_AUTO,
804
                .directory_fd_idx = UINT_MAX,
805
                .userns_fd_idx = UINT_MAX,
806
                .read_only = -1,
807
        };
UNCOV
808
        _cleanup_close_ int directory_fd = -EBADF, userns_fd = -EBADF;
×
UNCOV
809
        Hashmap **polkit_registry = ASSERT_PTR(userdata);
×
810
        int r;
×
811

UNCOV
812
        r = sd_varlink_dispatch(link, parameters, dispatch_table, &p);
×
UNCOV
813
        if (r != 0)
×
814
                return r;
815

UNCOV
816
        if (p.directory_fd_idx == UINT_MAX)
×
UNCOV
817
                return sd_varlink_error_invalid_parameter_name(link, "directoryFileDescriptor");
×
818

UNCOV
819
        directory_fd = sd_varlink_peek_dup_fd(link, p.directory_fd_idx);
×
UNCOV
820
        if (directory_fd < 0)
×
821
                return log_debug_errno(directory_fd, "Failed to peek directory fd from client: %m");
×
822

UNCOV
823
        if (p.userns_fd_idx != UINT_MAX) {
×
UNCOV
824
                userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx);
×
UNCOV
825
                if (userns_fd < 0)
×
UNCOV
826
                        return log_debug_errno(userns_fd, "Failed to peek user namespace fd from client: %m");
×
827
        }
828

829
        uid_t peer_uid;
×
UNCOV
830
        r = sd_varlink_get_peer_uid(link, &peer_uid);
×
UNCOV
831
        if (r < 0)
×
UNCOV
832
                return log_debug_errno(r, "Failed to get client UID: %m");
×
833

UNCOV
834
        uid_t current_owner_uid;
×
835
        DirectoryOwnership owned_by = validate_directory_fd(directory_fd, peer_uid, &current_owner_uid);
×
836
        if (owned_by == -EREMOTEIO)
×
UNCOV
837
                return sd_varlink_errorbo(link, "io.systemd.MountFileSystem.BadFileDescriptorFlags", SD_JSON_BUILD_PAIR_STRING("parameter", "directoryFileDescriptor"));
×
UNCOV
838
        if (owned_by < 0)
×
839
                return owned_by;
840

841
        r = validate_userns(link, &userns_fd);
×
UNCOV
842
        if (r != 0)
×
843
                return r;
844

845
        /* If no mode is specified, pick sensible default */
UNCOV
846
        if (p.mode <= 0) {
×
UNCOV
847
                p.mode = default_mount_map_mode(owned_by);
×
848
                assert(p.mode > 0);
×
849
        }
850

851
        _cleanup_free_ char *directory_path = NULL;
×
UNCOV
852
        (void) fd_get_path(directory_fd, &directory_path);
×
853

854
        log_debug("Mounting '%s' with mapping mode: %s", strna(directory_path), mount_map_mode_to_string(p.mode));
×
855

856
        const char *polkit_details[] = {
×
857
                "read_only", one_zero(p.read_only > 0),
×
858
                "directory", strna(directory_path),
×
859
                NULL,
860
        };
861

862
        const char *polkit_action, *polkit_untrusted_action;
×
863
        PolkitFlags polkit_flags;
×
864
        if (userns_fd < 0) {
×
865
                /* Mount into the host user namespace */
866
                polkit_action = "io.systemd.mount-file-system.mount-directory";
867
                polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-directory";
868
                polkit_flags = 0;
869
        } else {
870
                /* Mount into a private user namespace */
871
                polkit_action = "io.systemd.mount-file-system.mount-directory-privately";
×
872
                polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-directory-privately";
×
873

874
                /* If polkit is not around, let's allow mounting authenticated images by default */
875
                polkit_flags = POLKIT_DEFAULT_ALLOW;
×
876
        }
877

878
        /* We consider a directory "trusted" if it is owned by the peer or the foreign UID range */
UNCOV
879
        bool trusted_directory = IN_SET(owned_by, DIRECTORY_IS_ROOT_PEER_OWNED, DIRECTORY_IS_PEER_OWNED, DIRECTORY_IS_FOREIGN_OWNED);
×
880

881
        /* Let's definitely acquire the regular action privilege, for mounting properly signed images */
882
        r = varlink_verify_polkit_async_full(
×
883
                        link,
884
                        /* bus= */ NULL,
885
                        trusted_directory ? polkit_action : polkit_untrusted_action,
886
                        polkit_details,
887
                        /* good_user= */ UID_INVALID,
888
                        trusted_directory ? polkit_flags : 0,
889
                        polkit_registry);
UNCOV
890
        if (r <= 0)
×
891
                return r;
892

893
        /* Generate the common dissection directory here. We are not going to use it, but the clients might,
894
         * and they likely are unprivileged, hence cannot create it themselves. Hence let's just create it
895
         * here, if it is missing. */
UNCOV
896
        r = get_common_dissect_directory(NULL);
×
UNCOV
897
        if (r < 0)
×
898
                return r;
899

900
        _cleanup_close_ int mount_fd = open_tree_try_drop_idmap(
×
901
                        directory_fd,
902
                        "",
903
                        OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH);
UNCOV
904
        if (mount_fd < 0)
×
UNCOV
905
                return log_debug_errno(errno, "Failed to issue open_tree() of provided directory '%s': %m", strna(directory_path));
×
906

907
        /* MOUNT_ATTR_IDMAP has possibly been cleared. Let's verify that the underlying data matches our expectations. */
908
        struct stat unmapped_st;
×
UNCOV
909
        if (fstat(mount_fd, &unmapped_st) < 0)
×
UNCOV
910
                return log_debug_errno(errno, "Failed to stat unmapped inode: %m");
×
911

912
        r = stat_verify_directory(&unmapped_st);
×
913
        if (r < 0)
×
914
                return r;
915

916
        /* For now, let's simply refuse things if dropping the idmapping changed anything. For now that
917
         * should be good enough, because the primary usecase for this (homed) will mount the foreign UID
918
         * range 1:1. */
UNCOV
919
        if (unmapped_st.st_uid != current_owner_uid)
×
UNCOV
920
                return log_debug_errno(SYNTHETIC_ERRNO(EPERM), "Owner UID of mount after clearing ID mapping not the same anymore, refusing.");
×
921

UNCOV
922
        if (p.read_only > 0 && mount_setattr(
×
923
                            mount_fd, "", AT_EMPTY_PATH,
UNCOV
924
                            &(struct mount_attr) {
×
925
                                    .attr_set = MOUNT_ATTR_RDONLY,
926
                            }, MOUNT_ATTR_SIZE_VER0) < 0)
UNCOV
927
                return log_debug_errno(errno, "Failed to enable read-only mode: %m");
×
928

UNCOV
929
        if (p.mode != MOUNT_MAP_IDENTITY) {
×
UNCOV
930
                uid_t start;
×
931

UNCOV
932
                if (userns_fd >= 0) {
×
933
                        _cleanup_(uid_range_freep) UIDRange *uid_range_outside = NULL, *uid_range_inside = NULL, *gid_range_outside = NULL, *gid_range_inside = NULL;
×
UNCOV
934
                        r = uid_range_load_userns_by_fd(userns_fd, UID_RANGE_USERNS_OUTSIDE, &uid_range_outside);
×
UNCOV
935
                        if (r < 0)
×
UNCOV
936
                                return log_debug_errno(r, "Failed to load outside UID range of provided userns: %m");
×
UNCOV
937
                        r = uid_range_load_userns_by_fd(userns_fd, UID_RANGE_USERNS_INSIDE, &uid_range_inside);
×
UNCOV
938
                        if (r < 0)
×
939
                                return log_debug_errno(r, "Failed to load inside UID range of provided userns: %m");
×
UNCOV
940
                        r = uid_range_load_userns_by_fd(userns_fd, GID_RANGE_USERNS_OUTSIDE, &gid_range_outside);
×
UNCOV
941
                        if (r < 0)
×
UNCOV
942
                                return log_debug_errno(r, "Failed to load outside GID range of provided userns: %m");
×
UNCOV
943
                        r = uid_range_load_userns_by_fd(userns_fd, GID_RANGE_USERNS_INSIDE, &gid_range_inside);
×
UNCOV
944
                        if (r < 0)
×
UNCOV
945
                                return log_debug_errno(r, "Failed to load inside GID range of provided userns: %m");
×
946

947
                        /* Be very strict for now */
UNCOV
948
                        if (!uid_range_equal(uid_range_outside, gid_range_outside) ||
×
949
                            !uid_range_equal(uid_range_inside, gid_range_inside) ||
×
950
                            uid_range_outside->n_entries != 1 ||
×
UNCOV
951
                            uid_range_outside->entries[0].nr != 0x10000 ||
×
952
                            uid_range_inside->n_entries != 1 ||
×
953
                            uid_range_inside->entries[0].start != 0 ||
×
954
                            uid_range_inside->entries[0].nr != 0x10000)
×
UNCOV
955
                                return sd_varlink_error_invalid_parameter_name(link, "userNamespaceFileDescriptor");
×
956

957
                        start = uid_range_outside->entries[0].start;
×
958
                } else
959
                        start = 0;
960

961
                _cleanup_free_ char *new_uid_map = NULL;
×
UNCOV
962
                switch (p.mode) {
×
963
                case MOUNT_MAP_ROOT:
×
964
                        r = strextendf(&new_uid_map, UID_FMT " " UID_FMT " " UID_FMT,
×
965
                                       peer_uid, start, (uid_t) 1);
966
                        break;
UNCOV
967
                case MOUNT_MAP_FOREIGN:
×
968
                        r = strextendf(&new_uid_map, UID_FMT " " UID_FMT " " UID_FMT,
×
969
                                       (uid_t) FOREIGN_UID_MIN, start, (uid_t) 0x10000);
970
                        break;
UNCOV
971
                default:
×
972
                        assert_not_reached();
×
973
                }
UNCOV
974
                if (r < 0)
×
975
                        return r;
976

977
                _cleanup_close_ int idmap_userns_fd = userns_acquire(new_uid_map, new_uid_map, /* setgroups_deny= */ true);
×
978
                if (idmap_userns_fd < 0)
×
UNCOV
979
                        return log_debug_errno(idmap_userns_fd, "Failed to acquire user namespace for id mapping: %m");
×
980

981
                if (mount_setattr(mount_fd, "", AT_EMPTY_PATH,
×
UNCOV
982
                                  &(struct mount_attr) {
×
983
                                          .attr_set = MOUNT_ATTR_IDMAP,
984
                                          .userns_fd = idmap_userns_fd,
985
                                          .propagation = MS_PRIVATE,
986
                                  }, MOUNT_ATTR_SIZE_VER0) < 0)
987
                        return log_debug_errno(errno, "Failed to enable id mapping: %m");
×
988
        }
989

UNCOV
990
        if (userns_fd >= 0) {
×
991
                r = nsresource_add_mount(userns_fd, mount_fd);
×
992
                if (r < 0)
×
993
                        return r;
994
        }
995

UNCOV
996
        int fd_idx = sd_varlink_push_fd(link, mount_fd);
×
997
        if (fd_idx < 0)
×
998
                return fd_idx;
999

UNCOV
1000
        TAKE_FD(mount_fd);
×
1001

UNCOV
1002
        return sd_varlink_replybo(
×
1003
                        link,
1004
                        SD_JSON_BUILD_PAIR("mountFileDescriptor", SD_JSON_BUILD_INTEGER(fd_idx)));
1005
}
1006

1007
typedef struct MakeDirectoryParameters {
1008
        unsigned parent_fd_idx;
1009
        const char *name;
1010
} MakeDirectoryParameters;
1011

1012
static int vl_method_make_directory(
×
1013
                sd_varlink *link,
1014
                sd_json_variant *parameters,
1015
                sd_varlink_method_flags_t flags,
1016
                void *userdata) {
1017

1018
        static const sd_json_dispatch_field dispatch_table[] = {
×
1019
                { "parentFileDescriptor", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,        offsetof(MakeDirectoryParameters, parent_fd_idx), SD_JSON_MANDATORY },
1020
                { "name",                 SD_JSON_VARIANT_STRING,   json_dispatch_const_filename, offsetof(MakeDirectoryParameters, name),          SD_JSON_MANDATORY },
1021
                VARLINK_DISPATCH_POLKIT_FIELD,
1022
                {}
1023
        };
1024

1025
        MakeDirectoryParameters p = {
×
1026
                .parent_fd_idx = UINT_MAX,
1027
        };
UNCOV
1028
        Hashmap **polkit_registry = ASSERT_PTR(userdata);
×
1029
        int r;
×
1030

1031
        r = sd_varlink_dispatch(link, parameters, dispatch_table, &p);
×
UNCOV
1032
        if (r != 0)
×
1033
                return r;
×
1034

1035
        if (p.parent_fd_idx == UINT_MAX)
×
1036
                return sd_varlink_error_invalid_parameter_name(link, "parentFileDescriptor");
×
1037

1038
        _cleanup_close_ int parent_fd = sd_varlink_peek_dup_fd(link, p.parent_fd_idx);
×
UNCOV
1039
        if (parent_fd < 0)
×
UNCOV
1040
                return log_debug_errno(parent_fd, "Failed to peek parent directory fd from client: %m");
×
1041

UNCOV
1042
        uid_t peer_uid;
×
1043
        r = sd_varlink_get_peer_uid(link, &peer_uid);
×
UNCOV
1044
        if (r < 0)
×
UNCOV
1045
                return log_debug_errno(r, "Failed to get client UID: %m");
×
1046

1047
        struct stat parent_stat;
×
1048
        if (fstat(parent_fd, &parent_stat) < 0)
×
1049
                return r;
1050

UNCOV
1051
        r = stat_verify_directory(&parent_stat);
×
1052
        if (r < 0)
×
1053
                return r;
1054

1055
        int fl = fd_verify_safe_flags_full(parent_fd, O_DIRECTORY);
×
1056
        if (fl < 0)
×
UNCOV
1057
                return log_debug_errno(fl, "Directory file descriptor has unsafe flags set: %m");
×
1058

1059
        _cleanup_free_ char *parent_path = NULL;
×
UNCOV
1060
        (void) fd_get_path(parent_fd, &parent_path);
×
1061

1062
        _cleanup_free_ char *new_path = parent_path ? path_join(parent_path, p.name) : NULL;
×
1063
        log_debug("Asked to make directory: %s", strna(new_path));
×
1064

UNCOV
1065
        const char *polkit_details[] = {
×
1066
                "directory", strna(new_path),
×
1067
                NULL,
1068
        };
1069

1070
        const char *polkit_action;
×
1071
        PolkitFlags polkit_flags;
×
UNCOV
1072
        if (parent_stat.st_uid != peer_uid) {
×
1073
                polkit_action = "io.systemd.mount-file-system.make-directory-untrusted";
1074
                polkit_flags = 0;
1075
        } else {
UNCOV
1076
                polkit_action = "io.systemd.mount-file-system.make-directory";
×
1077
                polkit_flags = POLKIT_DEFAULT_ALLOW;
×
1078
        }
1079

UNCOV
1080
        r = varlink_verify_polkit_async_full(
×
1081
                        link,
1082
                        /* bus= */ NULL,
1083
                        polkit_action,
1084
                        polkit_details,
1085
                        /* good_user= */ UID_INVALID,
1086
                        polkit_flags,
1087
                        polkit_registry);
1088
        if (r <= 0)
×
1089
                return r;
1090

UNCOV
1091
        _cleanup_free_ char *t = NULL;
×
1092
        r = tempfn_random(p.name, "mountfsd", &t);
×
UNCOV
1093
        if (r < 0)
×
1094
                return r;
1095

1096
        _cleanup_close_ int fd = open_mkdir_at(parent_fd, t, O_CLOEXEC, 0700);
×
1097
        if (fd < 0)
×
1098
                return fd;
1099

1100
        r = RET_NERRNO(fchmod(fd, 0700)); /* Set mode explicitly, as paranoia regarding umask games */
×
UNCOV
1101
        if (r < 0)
×
1102
                goto fail;
×
1103

1104
        r = RET_NERRNO(fchown(fd, FOREIGN_UID_BASE, FOREIGN_UID_BASE));
×
1105
        if (r < 0)
×
1106
                goto fail;
×
1107

1108
        r = rename_noreplace(parent_fd, t, parent_fd, p.name);
×
UNCOV
1109
        if (r < 0)
×
UNCOV
1110
                goto fail;
×
1111

1112
        t = mfree(t); /* temporary filename no longer exists */
×
1113

UNCOV
1114
        int fd_idx = sd_varlink_push_fd(link, fd);
×
1115
        if (fd_idx < 0) {
×
1116
                r = fd_idx;
×
1117
                goto fail;
×
1118
        }
1119

UNCOV
1120
        TAKE_FD(fd);
×
1121

UNCOV
1122
        return sd_varlink_replybo(
×
1123
                        link,
1124
                        SD_JSON_BUILD_PAIR("directoryFileDescriptor", SD_JSON_BUILD_INTEGER(fd_idx)));
1125

UNCOV
1126
fail:
×
1127
        (void) unlinkat(parent_fd, t ?: p.name, AT_REMOVEDIR);
×
1128
        return r;
×
1129
}
1130

1131
static int process_connection(sd_varlink_server *server, int _fd) {
×
1132
        _cleanup_close_ int fd = TAKE_FD(_fd); /* always take possession */
×
1133
        _cleanup_(sd_varlink_close_unrefp) sd_varlink *vl = NULL;
×
1134
        _cleanup_(sd_event_unrefp) sd_event *event = NULL;
×
UNCOV
1135
        int r;
×
1136

1137
        r = sd_event_new(&event);
×
1138
        if (r < 0)
×
1139
                return r;
1140

UNCOV
1141
        r = sd_varlink_server_attach_event(server, event, 0);
×
1142
        if (r < 0)
×
1143
                return log_error_errno(r, "Failed to attach Varlink server to event loop: %m");
×
1144

UNCOV
1145
        r = sd_varlink_server_add_connection(server, fd, &vl);
×
UNCOV
1146
        if (r < 0)
×
1147
                return log_error_errno(r, "Failed to add connection: %m");
×
1148

UNCOV
1149
        TAKE_FD(fd);
×
UNCOV
1150
        vl = sd_varlink_ref(vl);
×
1151

1152
        r = sd_event_loop(event);
×
1153
        if (r < 0)
×
1154
                return log_error_errno(r, "Failed to run event loop: %m");
×
1155

1156
        r = sd_varlink_server_detach_event(server);
×
UNCOV
1157
        if (r < 0)
×
UNCOV
1158
                return log_error_errno(r, "Failed to detach Varlink server from event loop: %m");
×
1159

1160
        return 0;
1161
}
1162

UNCOV
1163
static int run(int argc, char *argv[]) {
×
1164
        usec_t start_time, listen_idle_usec, last_busy_usec = USEC_INFINITY;
×
UNCOV
1165
        _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *server = NULL;
×
UNCOV
1166
        _cleanup_hashmap_free_ Hashmap *polkit_registry = NULL;
×
1167
        _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
×
1168
        unsigned n_iterations = 0;
×
1169
        int m, listen_fd, r;
×
1170

1171
        log_setup();
×
1172

UNCOV
1173
        m = sd_listen_fds(false);
×
1174
        if (m < 0)
×
1175
                return log_error_errno(m, "Failed to determine number of listening fds: %m");
×
UNCOV
1176
        if (m == 0)
×
1177
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No socket to listen on received.");
×
1178
        if (m > 1)
×
UNCOV
1179
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Worker can only listen on a single socket at a time.");
×
1180

UNCOV
1181
        listen_fd = SD_LISTEN_FDS_START;
×
1182

UNCOV
1183
        r = fd_nonblock(listen_fd, false);
×
1184
        if (r < 0)
×
1185
                return log_error_errno(r, "Failed to turn off non-blocking mode for listening socket: %m");
×
1186

UNCOV
1187
        r = varlink_server_new(&server,
×
1188
                               SD_VARLINK_SERVER_INHERIT_USERDATA|
1189
                               SD_VARLINK_SERVER_ALLOW_FD_PASSING_INPUT|SD_VARLINK_SERVER_ALLOW_FD_PASSING_OUTPUT,
1190
                               &polkit_registry);
1191
        if (r < 0)
×
1192
                return log_error_errno(r, "Failed to allocate server: %m");
×
1193

UNCOV
1194
        r = sd_varlink_server_add_interface(server, &vl_interface_io_systemd_MountFileSystem);
×
UNCOV
1195
        if (r < 0)
×
UNCOV
1196
                return log_error_errno(r, "Failed to add MountFileSystem interface to varlink server: %m");
×
1197

1198
        r = sd_varlink_server_bind_method_many(
×
1199
                        server,
1200
                        "io.systemd.MountFileSystem.MountImage",     vl_method_mount_image,
1201
                        "io.systemd.MountFileSystem.MountDirectory", vl_method_mount_directory,
1202
                        "io.systemd.MountFileSystem.MakeDirectory",  vl_method_make_directory);
UNCOV
1203
        if (r < 0)
×
1204
                return log_error_errno(r, "Failed to bind methods: %m");
×
1205

UNCOV
1206
        r = sd_varlink_server_set_exit_on_idle(server, true);
×
UNCOV
1207
        if (r < 0)
×
UNCOV
1208
                return log_error_errno(r, "Failed to enable exit-on-idle mode: %m");
×
1209

UNCOV
1210
        r = getenv_bool("MOUNTFS_FIXED_WORKER");
×
UNCOV
1211
        if (r < 0)
×
UNCOV
1212
                return log_error_errno(r, "Failed to parse MOUNTFSD_FIXED_WORKER: %m");
×
UNCOV
1213
        listen_idle_usec = r ? USEC_INFINITY : LISTEN_IDLE_USEC;
×
1214

UNCOV
1215
        r = pidref_set_parent(&parent);
×
UNCOV
1216
        if (r < 0)
×
UNCOV
1217
                return log_error_errno(r, "Failed to acquire pidfd of parent process: %m");
×
1218

UNCOV
1219
        start_time = now(CLOCK_MONOTONIC);
×
1220

UNCOV
1221
        for (;;) {
×
UNCOV
1222
                _cleanup_close_ int fd = -EBADF;
×
UNCOV
1223
                usec_t n;
×
1224

1225
                /* Exit the worker in regular intervals, to flush out all memory use */
UNCOV
1226
                if (n_iterations++ > ITERATIONS_MAX) {
×
UNCOV
1227
                        log_debug("Exiting worker, processed %u iterations, that's enough.", n_iterations);
×
1228
                        break;
1229
                }
1230

UNCOV
1231
                n = now(CLOCK_MONOTONIC);
×
UNCOV
1232
                if (n >= usec_add(start_time, RUNTIME_MAX_USEC)) {
×
UNCOV
1233
                        log_debug("Exiting worker, ran for %s, that's enough.",
×
1234
                                  FORMAT_TIMESPAN(usec_sub_unsigned(n, start_time), 0));
UNCOV
1235
                        break;
×
1236
                }
1237

UNCOV
1238
                if (last_busy_usec == USEC_INFINITY)
×
1239
                        last_busy_usec = n;
UNCOV
1240
                else if (listen_idle_usec != USEC_INFINITY && n >= usec_add(last_busy_usec, listen_idle_usec)) {
×
UNCOV
1241
                        log_debug("Exiting worker, been idle for %s.",
×
1242
                                  FORMAT_TIMESPAN(usec_sub_unsigned(n, last_busy_usec), 0));
UNCOV
1243
                        break;
×
1244
                }
1245

UNCOV
1246
                (void) rename_process("systemd-mountwork: waiting...");
×
UNCOV
1247
                fd = RET_NERRNO(accept4(listen_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC));
×
UNCOV
1248
                (void) rename_process("systemd-mountwork: processing...");
×
1249

UNCOV
1250
                if (fd == -EAGAIN)
×
UNCOV
1251
                        continue; /* The listening socket has SO_RECVTIMEO set, hence a timeout is expected
×
1252
                                   * after a while, let's check if it's time to exit though. */
UNCOV
1253
                if (fd == -EINTR)
×
UNCOV
1254
                        continue; /* Might be that somebody attached via strace, let's just continue in that
×
1255
                                   * case */
UNCOV
1256
                if (fd < 0)
×
UNCOV
1257
                        return log_error_errno(fd, "Failed to accept() from listening socket: %m");
×
1258

UNCOV
1259
                if (now(CLOCK_MONOTONIC) <= usec_add(n, PRESSURE_SLEEP_TIME_USEC)) {
×
1260
                        /* We only slept a very short time? If so, let's see if there are more sockets
1261
                         * pending, and if so, let's ask our parent for more workers */
1262

UNCOV
1263
                        r = fd_wait_for_event(listen_fd, POLLIN, 0);
×
UNCOV
1264
                        if (r < 0)
×
UNCOV
1265
                                return log_error_errno(r, "Failed to test for POLLIN on listening socket: %m");
×
1266

UNCOV
1267
                        if (FLAGS_SET(r, POLLIN)) {
×
UNCOV
1268
                                r = pidref_kill(&parent, SIGUSR2);
×
UNCOV
1269
                                if (r == -ESRCH)
×
UNCOV
1270
                                        return log_error_errno(r, "Parent already died?");
×
UNCOV
1271
                                if (r < 0)
×
UNCOV
1272
                                        return log_error_errno(r, "Failed to send SIGUSR2 signal to parent: %m");
×
1273
                        }
1274
                }
1275

UNCOV
1276
                (void) process_connection(server, TAKE_FD(fd));
×
UNCOV
1277
                last_busy_usec = USEC_INFINITY;
×
1278
        }
1279

1280
        return 0;
1281
}
1282

UNCOV
1283
DEFINE_MAIN_FUNCTION(run);
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc