• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 21230603853

21 Jan 2026 10:57PM UTC coverage: 72.798% (+0.3%) from 72.524%
21230603853

push

github

web-flow
os-release: add a new FANCY_NAME= field to /etc/os-release, similar to PRETTY_NAME, that may carry ansi sequences + more unicode chars (#40367)

It's sometimes useful include non-ascii unicode chars in an os name, and
give it some ansi coloring. Since we usualy don't want to show that,
introduce a new field for it, and show it at boot and in thostnamectl
only, with safe fallbacks if colors/emojis are not available.

77 of 113 new or added lines in 5 files covered. (68.14%)

2146 existing lines in 53 files now uncovered.

311199 of 427481 relevant lines covered (72.8%)

1155064.07 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/mountfsd/mountwork.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <linux/loop.h>
4
#include <poll.h>
5
#include <stdlib.h>
6
#include <sys/mount.h>
7
#include <unistd.h>
8

9
#include "sd-daemon.h"
10
#include "sd-event.h"
11
#include "sd-varlink.h"
12

13
#include "argv-util.h"
14
#include "bus-polkit.h"
15
#include "chase.h"
16
#include "discover-image.h"
17
#include "dissect-image.h"
18
#include "env-util.h"
19
#include "errno-util.h"
20
#include "escape.h"
21
#include "fd-util.h"
22
#include "fs-util.h"
23
#include "format-util.h"
24
#include "hashmap.h"
25
#include "image-policy.h"
26
#include "io-util.h"
27
#include "iovec-util.h"
28
#include "json-util.h"
29
#include "loop-util.h"
30
#include "main-func.h"
31
#include "memory-util.h"
32
#include "mount-util.h"
33
#include "namespace-util.h"
34
#include "nsresource.h"
35
#include "nulstr-util.h"
36
#include "os-util.h"
37
#include "path-util.h"
38
#include "pidref.h"
39
#include "stat-util.h"
40
#include "string-table.h"
41
#include "string-util.h"
42
#include "strv.h"
43
#include "tmpfile-util.h"
44
#include "time-util.h"
45
#include "uid-classification.h"
46
#include "uid-range.h"
47
#include "user-util.h"
48
#include "varlink-io.systemd.MountFileSystem.h"
49
#include "varlink-util.h"
50

51
#define ITERATIONS_MAX 64U
52
#define RUNTIME_MAX_USEC (5 * USEC_PER_MINUTE)
53
#define PRESSURE_SLEEP_TIME_USEC (50 * USEC_PER_MSEC)
54
#define LISTEN_IDLE_USEC (90 * USEC_PER_SEC)
55

56
static const ImagePolicy image_policy_untrusted = {
57
        .n_policies = 2,
58
        .policies = {
59
                { PARTITION_ROOT,     PARTITION_POLICY_SIGNED|PARTITION_POLICY_ABSENT },
60
                { PARTITION_USR,      PARTITION_POLICY_SIGNED|PARTITION_POLICY_ABSENT },
61
        },
62
        .default_flags = PARTITION_POLICY_IGNORE,
63
};
64

65
static int json_dispatch_image_options(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata) {
×
66
        _cleanup_(mount_options_free_allp) MountOptions *options = NULL;
×
67
        MountOptions **p = ASSERT_PTR(userdata);
×
68
        int r;
×
69

70
        if (sd_json_variant_is_null(variant)) {
×
71
                *p = mount_options_free_all(*p);
×
72
                return 0;
×
73
        }
74

75
        if (!sd_json_variant_is_object(variant))
×
76
                return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name));
×
77

78
        const char *k;
×
79
        sd_json_variant *e;
×
80
        JSON_VARIANT_OBJECT_FOREACH(k, e, variant) {
×
81
                PartitionDesignator pd = partition_designator_from_string(k);
×
82
                if (pd < 0)
×
83
                        return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Invalid partition designator '%s'.", strna(k));
×
84

85
                if (!sd_json_variant_is_string(e))
×
86
                        return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "Mount option for partition '%s' is not a string.", strna(k));
×
87

88
                if (!options) {
×
89
                        options = new0(MountOptions, 1);
×
90
                        if (!options)
×
91
                                return json_log_oom(variant, flags);
×
92
                }
93

94
                r = free_and_strdup(&options->options[pd], sd_json_variant_string(e));
×
95
                if (r < 0)
×
96
                        return json_log_oom(variant, flags);
×
97
        }
98

99
        mount_options_free_all(*p);
×
100
        *p = TAKE_PTR(options);
×
101
        return 0;
×
102
}
103

104
static int json_dispatch_image_policy(const char *name, sd_json_variant *variant, sd_json_dispatch_flags_t flags, void *userdata) {
×
105
        _cleanup_(image_policy_freep) ImagePolicy *q = NULL;
×
106
        ImagePolicy **p = ASSERT_PTR(userdata);
×
107
        int r;
×
108

109
        assert(p);
×
110

111
        if (sd_json_variant_is_null(variant)) {
×
112
                *p = image_policy_free(*p);
×
113
                return 0;
×
114
        }
115

116
        if (!sd_json_variant_is_string(variant))
×
117
                return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
×
118

119
        r = image_policy_from_string(sd_json_variant_string(variant), /* graceful= */ false, &q);
×
120
        if (r < 0)
×
121
                return json_log(variant, flags, r, "JSON field '%s' is not a valid image policy.", strna(name));
×
122

123
        image_policy_free(*p);
×
124
        *p = TAKE_PTR(q);
×
125
        return 0;
×
126
}
127

128
typedef struct MountImageParameters {
129
        unsigned image_fd_idx;
130
        unsigned userns_fd_idx;
131
        int read_only;
132
        int growfs;
133
        char *password;
134
        ImagePolicy *image_policy;
135
        MountOptions *options;
136
        bool relax_extension_release_check;
137
        bool verity_sharing;
138
        struct iovec verity_root_hash;
139
        struct iovec verity_root_hash_sig;
140
        unsigned verity_data_fd_idx;
141
} MountImageParameters;
142

143
static void mount_image_parameters_done(MountImageParameters *p) {
×
UNCOV
144
        assert(p);
×
145

146
        p->password = erase_and_free(p->password);
×
147
        p->image_policy = image_policy_free(p->image_policy);
×
148
        iovec_done(&p->verity_root_hash);
×
149
        iovec_done(&p->verity_root_hash_sig);
×
150
        p->options = mount_options_free_all(p->options);
×
UNCOV
151
}
×
152

153
static int validate_image_fd(int fd, MountImageParameters *p) {
×
UNCOV
154
        int r, fl;
×
155

156
        assert(fd >= 0);
×
UNCOV
157
        assert(p);
×
158

159
        struct stat st;
×
160
        if (fstat(fd, &st) < 0)
×
UNCOV
161
                return -errno;
×
162
        /* Only support regular files and block devices. Let's use stat_verify_regular() here for the nice
163
         * error numbers it generates. */
164
        if (!S_ISBLK(st.st_mode)) {
×
165
                r = stat_verify_regular(&st);
×
UNCOV
166
                if (r < 0)
×
167
                        return r;
168
        }
169

170
        fl = fd_verify_safe_flags_full(fd, O_NONBLOCK);
×
171
        if (fl < 0)
×
UNCOV
172
                return log_debug_errno(fl, "Image file descriptor has unsafe flags set: %m");
×
173

UNCOV
174
        switch (fl & O_ACCMODE_STRICT) {
×
175

176
        case O_RDONLY:
×
177
                p->read_only = true;
×
UNCOV
178
                break;
×
179

180
        case O_RDWR:
181
                break;
182

183
        default:
184
                return -EBADF;
185
        }
186

187
        return 0;
188
}
189

190
static int verify_trusted_image_fd_by_path(int fd) {
×
UNCOV
191
        int r;
×
192

UNCOV
193
        assert(fd >= 0);
×
194

195
        r = secure_getenv_bool("SYSTEMD_MOUNTFSD_TRUSTED_DIRECTORIES");
×
196
        if (r == -ENXIO)  {
×
197
                if (!DEFAULT_MOUNTFSD_TRUSTED_DIRECTORIES) {
×
198
                        log_debug("Trusted directory mechanism disabled at compile time.");
×
UNCOV
199
                        return false;
×
200
                }
201
        } else if (r < 0) {
×
202
                log_debug_errno(r, "Failed to parse $SYSTEMD_MOUNTFSD_TRUSTED_DIRECTORIES environment variable, not trusting any image.");
×
203
                return false;
×
204
        } else if (!r) {
×
205
                log_debug("Trusted directory mechanism disabled via $SYSTEMD_MOUNTFSD_TRUSTED_DIRECTORIES environment variable.");
×
UNCOV
206
                return false;
×
207
        }
208

209
        _cleanup_free_ char *p = NULL;
×
210
        r = fd_get_path(fd, &p);
×
211
        if (r < 0)
×
UNCOV
212
                return log_debug_errno(r, "Failed to get path of passed image file descriptor: %m");
×
213

214
        struct stat sta;
×
215
        if (fstat(fd, &sta) < 0)
×
216
                return log_debug_errno(errno, "Failed to stat() passed image file descriptor: %m");
×
217
        if (!S_ISREG(sta.st_mode)) {
×
218
                log_debug("Image '%s' is not a regular file, hence skipping trusted directory check.", p);
×
UNCOV
219
                return false;
×
220
        }
221

UNCOV
222
        log_debug("Checking if image '%s' is in trusted directories.", p);
×
223

224
        for (ImageClass c = 0; c < _IMAGE_CLASS_MAX; c++)
×
225
                NULSTR_FOREACH(s, image_search_path[c]) {
×
226
                        _cleanup_close_ int dir_fd = -EBADF, inode_fd = -EBADF;
×
227
                        _cleanup_free_ char *q = NULL;
×
228
                        struct stat stb;
×
UNCOV
229
                        const char *e;
×
230

231
                        r = chase(s, NULL, CHASE_SAFE|CHASE_TRIGGER_AUTOFS, &q, &dir_fd);
×
232
                        if (r == -ENOENT)
×
233
                                continue;
×
234
                        if (r < 0) {
×
235
                                log_warning_errno(r, "Failed to resolve search path '%s', ignoring: %m", s);
×
UNCOV
236
                                continue;
×
237
                        }
238

239
                        /* Check that the inode refers to a file immediately inside the image directory,
240
                         * i.e. not the image directory itself, and nothing further down the tree */
241
                        e = path_startswith(p, q);
×
242
                        if (isempty(e))
×
UNCOV
243
                                continue;
×
244

245
                        e += strspn(e, "/");
×
246
                        if (!filename_is_valid(e))
×
UNCOV
247
                                continue;
×
248

249
                        r = chaseat(dir_fd, e, CHASE_SAFE|CHASE_TRIGGER_AUTOFS, NULL, &inode_fd);
×
250
                        if (r < 0)
×
UNCOV
251
                                return log_error_errno(r, "Couldn't verify that specified image '%s' is in search path '%s': %m", p, s);
×
252

253
                        if (fstat(inode_fd, &stb) < 0)
×
UNCOV
254
                                return log_error_errno(errno, "Failed to stat image file '%s/%s': %m", q, e);
×
255

256
                        if (stat_inode_same(&sta, &stb)) {
×
257
                                log_debug("Image '%s' is *in* trusted directories.", p);
×
UNCOV
258
                                return true; /* Yay */
×
259
                        }
260
                }
261

UNCOV
262
        log_debug("Image '%s' is *not* in trusted directories.", p);
×
263
        return false;
264
}
265

UNCOV
266
static int determine_image_policy(
×
267
                int image_fd,
268
                bool trusted,
269
                ImagePolicy *client_policy,
270
                ImagePolicy **ret) {
271

272
        _cleanup_(image_policy_freep) ImagePolicy *envvar_policy = NULL;
×
273
        const ImagePolicy *default_policy;
×
274
        const char *envvar, *e;
×
UNCOV
275
        int r;
×
276

277
        assert(image_fd >= 0);
×
UNCOV
278
        assert(ret);
×
279

UNCOV
280
        if (trusted) {
×
281
                envvar = "SYSTEMD_MOUNTFSD_IMAGE_POLICY_TRUSTED";
282
                default_policy = &image_policy_allow;
283
        } else {
284
                envvar = "SYSTEMD_MOUNTFSD_IMAGE_POLICY_UNTRUSTED";
×
UNCOV
285
                default_policy = &image_policy_untrusted;
×
286
        }
287

288
        e = secure_getenv(envvar);
×
289
        if (e) {
×
290
                r = image_policy_from_string(e, /* graceful= */ false, &envvar_policy);
×
291
                if (r < 0)
×
UNCOV
292
                        return log_error_errno(r, "Failed to parse image policy supplied via $%s: %m", envvar);
×
293

UNCOV
294
                default_policy = envvar_policy;
×
295
        }
296

UNCOV
297
        return image_policy_intersect(default_policy, client_policy, ret);
×
298
}
299

300
static int validate_userns(sd_varlink *link, int *userns_fd) {
×
UNCOV
301
        int r;
×
302

303
        assert(link);
×
UNCOV
304
        assert(userns_fd);
×
305

UNCOV
306
        if (*userns_fd < 0)
×
307
                return 0;
308

309
        r = fd_verify_safe_flags(*userns_fd);
×
310
        if (r < 0)
×
UNCOV
311
                return log_debug_errno(r, "User namespace file descriptor has unsafe flags set: %m");
×
312

313
        r = fd_is_namespace(*userns_fd, NAMESPACE_USER);
×
UNCOV
314
        if (r < 0)
×
315
                return r;
316
        if (r == 0)
×
UNCOV
317
                return sd_varlink_error_invalid_parameter_name(link, "userNamespaceFileDescriptor");
×
318

319
        /* Our own host user namespace? Then close the fd, and handle it as if none was specified. */
320
        r = is_our_namespace(*userns_fd, NAMESPACE_USER);
×
321
        if (r < 0)
×
322
                return log_debug_errno(r, "Failed to determine if user namespace provided by client is our own.");
×
323
        if (r > 0) {
×
324
                log_debug("User namespace provided by client is our own.");
×
UNCOV
325
                *userns_fd = safe_close(*userns_fd);
×
326
        }
327

328
        return 0;
329
}
330

331
static int mount_options_to_polkit_details(const MountOptions *options, char **ret_mount_options_concat) {
×
332
        _cleanup_free_ char *mount_options_concat = NULL;
×
UNCOV
333
        int r;
×
334

UNCOV
335
        assert(ret_mount_options_concat);
×
336

337
        if (!options) {
×
338
                *ret_mount_options_concat = NULL;
×
UNCOV
339
                return 0;
×
340
        }
341

342
        for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
×
UNCOV
343
                _cleanup_free_ char *escaped = NULL;
×
344

345
                if (isempty(options->options[i]))
×
UNCOV
346
                        continue;
×
347

348
                escaped = shell_escape(options->options[i], ":");
×
349
                if (!escaped)
×
UNCOV
350
                        return log_oom_debug();
×
351

UNCOV
352
                r = strextendf_with_separator(
×
353
                                &mount_options_concat,
354
                                ",",
355
                                "%s:%s",
356
                                partition_designator_to_string(i),
357
                                escaped);
UNCOV
358
                if (r < 0)
×
359
                        return r;
360
        }
361

362
        *ret_mount_options_concat = TAKE_PTR(mount_options_concat);
×
UNCOV
363
        return 0;
×
364
}
365

UNCOV
366
static int vl_method_mount_image(
×
367
                sd_varlink *link,
368
                sd_json_variant *parameters,
369
                sd_varlink_method_flags_t flags,
370
                void *userdata) {
371

UNCOV
372
        static const sd_json_dispatch_field dispatch_table[] = {
×
373
                { "imageFileDescriptor",         SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,        offsetof(MountImageParameters, image_fd_idx),                  SD_JSON_MANDATORY },
374
                { "userNamespaceFileDescriptor", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,        offsetof(MountImageParameters, userns_fd_idx),                 0 },
375
                { "readOnly",                    SD_JSON_VARIANT_BOOLEAN,  sd_json_dispatch_tristate,    offsetof(MountImageParameters, read_only),                     0 },
376
                { "growFileSystems",             SD_JSON_VARIANT_BOOLEAN,  sd_json_dispatch_tristate,    offsetof(MountImageParameters, growfs),                        0 },
377
                { "password",                    SD_JSON_VARIANT_STRING,   sd_json_dispatch_string,      offsetof(MountImageParameters, password),                      0 },
378
                { "imagePolicy",                 SD_JSON_VARIANT_STRING,   json_dispatch_image_policy,   offsetof(MountImageParameters, image_policy),                  0 },
379
                { "mountOptions",                SD_JSON_VARIANT_OBJECT,   json_dispatch_image_options,  offsetof(MountImageParameters, options),                       0 },
380
                { "relaxExtensionReleaseChecks", SD_JSON_VARIANT_BOOLEAN,  sd_json_dispatch_stdbool,     offsetof(MountImageParameters, relax_extension_release_check), 0 },
381
                { "veritySharing",               SD_JSON_VARIANT_BOOLEAN,  sd_json_dispatch_stdbool,     offsetof(MountImageParameters, verity_sharing),                0 },
382
                { "verityDataFileDescriptor",    SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,        offsetof(MountImageParameters, verity_data_fd_idx),            0 },
383
                { "verityRootHash",              SD_JSON_VARIANT_STRING,   json_dispatch_unhex_iovec,    offsetof(MountImageParameters, verity_root_hash),              0 },
384
                { "verityRootHashSignature",     SD_JSON_VARIANT_STRING,   json_dispatch_unbase64_iovec, offsetof(MountImageParameters, verity_root_hash_sig),          0 },
385
                VARLINK_DISPATCH_POLKIT_FIELD,
386
                {}
387
        };
388

UNCOV
389
        _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
×
UNCOV
390
        _cleanup_(mount_image_parameters_done) MountImageParameters p = {
×
391
                .image_fd_idx = UINT_MAX,
392
                .userns_fd_idx = UINT_MAX,
393
                .verity_data_fd_idx = UINT_MAX,
394
                .read_only = -1,
395
                .growfs = -1,
396
        };
397
        _cleanup_(dissected_image_unrefp) DissectedImage *di = NULL;
×
398
        _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
×
399
        _cleanup_(sd_json_variant_unrefp) sd_json_variant *aj = NULL;
×
400
        _cleanup_close_ int image_fd = -EBADF, userns_fd = -EBADF, verity_data_fd = -EBADF;
×
401
        _cleanup_(image_policy_freep) ImagePolicy *use_policy = NULL;
×
402
        Hashmap **polkit_registry = ASSERT_PTR(userdata);
×
403
        _cleanup_free_ char *ps = NULL;
×
UNCOV
404
        bool image_is_trusted = false;
×
405
        int r;
×
406

UNCOV
407
        assert(link);
×
408
        assert(parameters);
×
409

410
        sd_json_variant_sensitive(parameters); /* might contain passwords */
×
411

UNCOV
412
        r = sd_varlink_dispatch(link, parameters, dispatch_table, &p);
×
UNCOV
413
        if (r != 0)
×
414
                return r;
415

416
        /* Verity data and roothash have to be either both set, or both unset. The sig can be set only if
417
         * the roothash is set. */
418
        if ((p.verity_data_fd_idx != UINT_MAX) != (p.verity_root_hash.iov_len > 0))
×
419
                return sd_varlink_error_invalid_parameter_name(link, "verityDataFileDescriptor");
×
UNCOV
420
        if (p.verity_root_hash_sig.iov_len > 0 && p.verity_root_hash.iov_len == 0)
×
421
                return sd_varlink_error_invalid_parameter_name(link, "verityRootHashSignature");
×
422

423
        if (p.image_fd_idx != UINT_MAX) {
×
424
                image_fd = sd_varlink_peek_dup_fd(link, p.image_fd_idx);
×
UNCOV
425
                if (image_fd < 0)
×
UNCOV
426
                        return log_debug_errno(image_fd, "Failed to peek image fd from client: %m");
×
427
        }
428

429
        if (p.userns_fd_idx != UINT_MAX) {
×
430
                userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx);
×
UNCOV
431
                if (userns_fd < 0)
×
UNCOV
432
                        return log_debug_errno(userns_fd, "Failed to peek user namespace fd from client: %m");
×
433
        }
434

435
        r = validate_image_fd(image_fd, &p);
×
436
        if (r == -EREMOTEIO)
×
UNCOV
437
                return sd_varlink_errorbo(link, "io.systemd.MountFileSystem.BadFileDescriptorFlags", SD_JSON_BUILD_PAIR_STRING("parameter", "imageFileDescriptor"));
×
UNCOV
438
        if (r < 0)
×
439
                return r;
440

UNCOV
441
        r = validate_userns(link, &userns_fd);
×
UNCOV
442
        if (r != 0)
×
443
                return r;
444

445
        /* Mount options could be used to thwart security measures such as ACLs or SELinux so if they are
446
         * specified don't mark the image as trusted so that it requires additional privileges to use. */
447
        if (!p.options) {
×
UNCOV
448
                r = verify_trusted_image_fd_by_path(image_fd);
×
449
                if (r < 0)
×
450
                        return r;
UNCOV
451
                image_is_trusted = r;
×
452
        }
453

454
        if (p.verity_data_fd_idx != UINT_MAX) {
×
455
                verity_data_fd = sd_varlink_peek_dup_fd(link, p.verity_data_fd_idx);
×
UNCOV
456
                if (verity_data_fd < 0)
×
457
                        return log_debug_errno(verity_data_fd, "Failed to peek verity data fd from client: %m");
×
458

459
                r = fd_verify_safe_flags(verity_data_fd);
×
UNCOV
460
                if (r < 0)
×
461
                        return log_debug_errno(r, "Verity data file descriptor has unsafe flags set: %m");
×
462

UNCOV
463
                verity.data_path = strdup(FORMAT_PROC_FD_PATH(verity_data_fd));
×
UNCOV
464
                if (!verity.data_path)
×
465
                        return -ENOMEM;
466

467
                verity.designator = PARTITION_ROOT;
×
UNCOV
468
                verity.root_hash = TAKE_STRUCT(p.verity_root_hash);
×
UNCOV
469
                verity.root_hash_sig = TAKE_STRUCT(p.verity_root_hash_sig);
×
470
        }
471

472
        /* Let the polkit rule know what mount options the caller tries to use, so that rules can decide
473
         * whether to allow or deny the operation based on what the options are. */
474
        _cleanup_free_ char *mount_options_concat = NULL;
×
UNCOV
475
        r = mount_options_to_polkit_details(p.options, &mount_options_concat);
×
UNCOV
476
        if (r < 0)
×
477
                return r;
478

479
        const char *polkit_details[] = {
×
UNCOV
480
                "read_only", one_zero(p.read_only > 0),
×
UNCOV
481
                !isempty(mount_options_concat) ? "mount_options" : NULL, mount_options_concat,
×
482
                NULL,
483
        };
484

485
        const char *polkit_action, *polkit_untrusted_action;
×
UNCOV
486
        PolkitFlags polkit_flags;
×
UNCOV
487
        if (userns_fd < 0) {
×
488
                /* Mount into the host user namespace */
489
                polkit_action = "io.systemd.mount-file-system.mount-image";
490
                polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-image";
491
                polkit_flags = 0;
492
        } else {
493
                /* Mount into a private user namespace */
UNCOV
494
                polkit_action = "io.systemd.mount-file-system.mount-image-privately";
×
UNCOV
495
                polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-image-privately";
×
496

497
                /* If polkit is not around, let's allow mounting authenticated images by default */
UNCOV
498
                polkit_flags = POLKIT_DEFAULT_ALLOW;
×
499
        }
500

501
        /* Let's definitely acquire the regular action privilege, for mounting properly signed images */
UNCOV
502
        r = varlink_verify_polkit_async_full(
×
503
                        link,
504
                        /* bus= */ NULL,
UNCOV
505
                        p.options ? polkit_untrusted_action : polkit_action, /* Using mount options requires higher privs */
×
506
                        polkit_details,
507
                        /* good_user= */ UID_INVALID,
508
                        polkit_flags,
509
                        polkit_registry);
UNCOV
510
        if (r <= 0)
×
511
                return r;
512

513
        /* Generate the common dissection directory here. We are not going to use it, but the clients might,
514
         * and they likely are unprivileged, hence cannot create it themselves. Hence let's just create it
515
         * here, if it is missing. */
UNCOV
516
        r = get_common_dissect_directory(NULL);
×
UNCOV
517
        if (r < 0)
×
518
                return r;
519

520
        r = loop_device_make(
×
521
                        image_fd,
UNCOV
522
                        p.read_only == 0 ? O_RDONLY : O_RDWR,
×
523
                        0,
524
                        UINT64_MAX,
525
                        UINT32_MAX,
526
                        LO_FLAGS_PARTSCAN,
527
                        LOCK_EX,
528
                        &loop);
UNCOV
529
        if (r < 0)
×
530
                return r;
531

532
        DissectImageFlags dissect_flags =
×
UNCOV
533
                (p.read_only == 0 ? DISSECT_IMAGE_READ_ONLY : 0) |
×
UNCOV
534
                (p.growfs != 0 ? DISSECT_IMAGE_GROWFS : 0) |
×
535
                DISSECT_IMAGE_DISCARD_ANY |
536
                DISSECT_IMAGE_FSCK |
537
                DISSECT_IMAGE_ADD_PARTITION_DEVICES |
×
UNCOV
538
                DISSECT_IMAGE_PIN_PARTITION_DEVICES |
×
UNCOV
539
                (p.verity_sharing ? DISSECT_IMAGE_VERITY_SHARE : 0) |
×
540
                /* Maybe the image is a bare filesystem. Note that this requires privileges, as it is
541
                 * classified by the policy as an 'unprotected' image and will be refused otherwise. */
UNCOV
542
                DISSECT_IMAGE_NO_PARTITION_TABLE |
×
UNCOV
543
                DISSECT_IMAGE_ALLOW_USERSPACE_VERITY |
×
544
                (p.relax_extension_release_check ? DISSECT_IMAGE_RELAX_EXTENSION_CHECK : 0);
×
545

546
        /* Let's see if we have acquired the privilege to mount untrusted images already */
547
        bool polkit_have_untrusted_action =
×
548
                varlink_has_polkit_action(link, polkit_untrusted_action, polkit_details, polkit_registry);
×
549

UNCOV
550
        for (;;) {
×
UNCOV
551
                use_policy = image_policy_free(use_policy);
×
UNCOV
552
                ps = mfree(ps);
×
553

554
                /* We use the image policy for trusted images if either the path is below a trusted
555
                 * directory, or if we have already acquired a PK authentication that tells us that untrusted
556
                 * images are OK */
UNCOV
557
                bool use_trusted_policy =
×
558
                        image_is_trusted ||
559
                        polkit_have_untrusted_action;
560

UNCOV
561
                r = determine_image_policy(
×
562
                                image_fd,
563
                                use_trusted_policy,
564
                                p.image_policy,
565
                                &use_policy);
566
                if (r < 0)
×
567
                        return r;
568

UNCOV
569
                r = image_policy_to_string(use_policy, /* simplify= */ true, &ps);
×
570
                if (r < 0)
×
571
                        return r;
572

UNCOV
573
                log_debug("Using image policy: %s", ps);
×
574

575
                r = dissect_loop_device(
×
576
                                loop,
577
                                &verity,
UNCOV
578
                                p.options,
×
579
                                use_policy,
580
                                /* image_filter= */ NULL,
581
                                dissect_flags,
582
                                &di);
583
                if (r == -ENOPKG)
×
584
                        return sd_varlink_error(link, "io.systemd.MountFileSystem.IncompatibleImage", NULL);
×
585
                if (r == -ENOTUNIQ)
×
586
                        return sd_varlink_error(link, "io.systemd.MountFileSystem.MultipleRootPartitionsFound", NULL);
×
UNCOV
587
                if (r == -ENXIO)
×
UNCOV
588
                        return sd_varlink_error(link, "io.systemd.MountFileSystem.RootPartitionNotFound", NULL);
×
589
                if (r == -ERFKILL) {
×
590
                        /* The image policy refused this, let's retry after trying to get PolicyKit */
591

UNCOV
592
                        if (!polkit_have_untrusted_action) {
×
UNCOV
593
                                log_debug("Denied by image policy. Trying a stronger polkit authentication before continuing.");
×
UNCOV
594
                                r = varlink_verify_polkit_async_full(
×
595
                                                link,
596
                                                /* bus= */ NULL,
597
                                                polkit_untrusted_action,
598
                                                polkit_details,
599
                                                /* good_user= */ UID_INVALID,
600
                                                /* flags= */ 0,                   /* NB: the image cannot be authenticated, hence unless PK is around to allow this anyway, fail! */
601
                                                polkit_registry);
UNCOV
602
                                if (r <= 0 && !ERRNO_IS_NEG_PRIVILEGE(r))
×
603
                                        return r;
604
                                if (r > 0) {
×
605
                                        /* Try again, now that we know the client has enough privileges. */
UNCOV
606
                                        log_debug("Denied by image policy, retrying after polkit authentication.");
×
UNCOV
607
                                        polkit_have_untrusted_action = true;
×
UNCOV
608
                                        continue;
×
609
                                }
610
                        }
611

UNCOV
612
                        return sd_varlink_error(link, "io.systemd.MountFileSystem.DeniedByImagePolicy", NULL);
×
613
                }
UNCOV
614
                if (r < 0)
×
615
                        return r;
616

617
                /* Success */
618
                break;
×
619
        }
620

UNCOV
621
        r = dissected_image_load_verity_sig_partition(
×
622
                        di,
UNCOV
623
                        loop->fd,
×
624
                        &verity);
625
        if (r < 0)
×
626
                return r;
627

628
        r = dissected_image_guess_verity_roothash(
×
629
                        di,
630
                        &verity);
631
        if (r < 0)
×
632
                return r;
633

634
        r = dissected_image_decrypt(
×
635
                        di,
636
                        /* root= */ NULL,
UNCOV
637
                        p.password,
×
638
                        &verity,
639
                        use_policy,
640
                        dissect_flags);
641
        if (r == -ENOKEY) /* new dm-verity userspace returns ENOKEY if the dm-verity signature key is not in
×
642
                           * key chain. That's great. */
UNCOV
643
                return sd_varlink_error(link, "io.systemd.MountFileSystem.KeyNotFound", NULL);
×
UNCOV
644
        if (r == -EBUSY) /* DM kernel subsystem is shit with returning useful errors hence we keep retrying
×
645
                          * under the assumption that some errors are transitional. Which the errors might
646
                          * not actually be. After all retries failed we return EBUSY. Let's turn that into a
647
                          * generic Verity error. It's not very helpful, could mean anything, but at least it
648
                          * gives client a clear idea that this has to do with Verity. */
UNCOV
649
                return sd_varlink_error(link, "io.systemd.MountFileSystem.VerityFailure", NULL);
×
650
        if (r < 0)
×
651
                return r;
652

UNCOV
653
        r = dissected_image_mount(
×
654
                        di,
655
                        /* where= */ NULL,
656
                        /* uid_shift= */ UID_INVALID,
657
                        /* uid_range= */ UID_INVALID,
658
                        userns_fd,
659
                        dissect_flags);
660
        if (r < 0)
×
661
                return r;
662

UNCOV
663
        for (PartitionDesignator d = 0; d < _PARTITION_DESIGNATOR_MAX; d++) {
×
664
                DissectedPartition *pp = di->partitions + d;
×
665
                int fd_idx;
×
666

667
                if (!pp->found)
×
668
                        continue;
×
669

670
                if (pp->fsmount_fd < 0)
×
671
                        continue;
×
672

673
                if (userns_fd >= 0) {
×
UNCOV
674
                        r = nsresource_add_mount(userns_fd, pp->fsmount_fd);
×
UNCOV
675
                        if (r < 0)
×
676
                                return r;
×
677
                }
678

UNCOV
679
                fd_idx = sd_varlink_push_fd(link, pp->fsmount_fd);
×
680
                if (fd_idx < 0)
×
681
                        return fd_idx;
682

683
                TAKE_FD(pp->fsmount_fd);
×
684

685
                const char *m = partition_mountpoint_to_string(d);
×
686
                _cleanup_strv_free_ char **l = NULL;
×
687
                if (!isempty(m)) {
×
UNCOV
688
                        l = strv_split_nulstr(m);
×
UNCOV
689
                        if (!l)
×
690
                                return log_oom_debug();
×
691
                }
692

UNCOV
693
                r = sd_json_variant_append_arraybo(
×
694
                                &aj,
695
                                SD_JSON_BUILD_PAIR_STRING("designator", partition_designator_to_string(d)),
696
                                SD_JSON_BUILD_PAIR_BOOLEAN("writable", pp->rw),
697
                                SD_JSON_BUILD_PAIR_BOOLEAN("growFileSystem", pp->growfs),
698
                                SD_JSON_BUILD_PAIR_CONDITION(pp->partno > 0, "partitionNumber", SD_JSON_BUILD_INTEGER(pp->partno)),
699
                                SD_JSON_BUILD_PAIR_CONDITION(pp->architecture > 0, "architecture", SD_JSON_BUILD_STRING(architecture_to_string(pp->architecture))),
700
                                SD_JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(pp->uuid), "partitionUuid", SD_JSON_BUILD_UUID(pp->uuid)),
701
                                SD_JSON_BUILD_PAIR_STRING("fileSystemType", dissected_partition_fstype(pp)),
702
                                SD_JSON_BUILD_PAIR_CONDITION(!!pp->label, "partitionLabel", SD_JSON_BUILD_STRING(pp->label)),
703
                                SD_JSON_BUILD_PAIR_UNSIGNED("size", pp->size),
704
                                SD_JSON_BUILD_PAIR_UNSIGNED("offset", pp->offset),
705
                                SD_JSON_BUILD_PAIR_INTEGER("mountFileDescriptor", fd_idx),
706
                                JSON_BUILD_PAIR_STRV_NON_EMPTY("mountPoint", l));
UNCOV
707
                if (r < 0)
×
708
                        return r;
709
        }
710

UNCOV
711
        loop_device_relinquish(loop);
×
712

UNCOV
713
        return sd_varlink_replybo(
×
714
                        link,
715
                        SD_JSON_BUILD_PAIR_VARIANT("partitions", aj),
716
                        SD_JSON_BUILD_PAIR_BOOLEAN("singleFileSystem", di->single_file_system),
717
                        SD_JSON_BUILD_PAIR_STRING("imagePolicy", ps),
718
                        SD_JSON_BUILD_PAIR_UNSIGNED("imageSize", di->image_size),
719
                        SD_JSON_BUILD_PAIR_UNSIGNED("sectorSize", di->sector_size),
720
                        SD_JSON_BUILD_PAIR_CONDITION(!sd_id128_is_null(di->image_uuid), "imageUuid", SD_JSON_BUILD_UUID(di->image_uuid)));
721
}
722

723
typedef enum MountMapMode {
724
        MOUNT_MAP_AUTO = 0,     /* determine automatically from image and caller */
725
        MOUNT_MAP_ROOT,         /* map caller's UID to root in namespace (map 1 UID only) */
726
        MOUNT_MAP_FOREIGN,      /* map foreign UID range to base in namespace (map 64K) */
727
        MOUNT_MAP_IDENTITY,     /* apply identity mapping (map 64K) */
728
        _MOUNT_MAP_MODE_MAX,
729
        _MOUNT_MAP_MODE_INVALID = -EINVAL,
730
} MountMapMode;
731

732
static const char *const mount_map_mode_table[_MOUNT_MAP_MODE_MAX] = {
733
        [MOUNT_MAP_AUTO]     = "auto",
734
        [MOUNT_MAP_ROOT]     = "root",
735
        [MOUNT_MAP_FOREIGN]  = "foreign",
736
        [MOUNT_MAP_IDENTITY] = "identity",
737
};
738

UNCOV
739
DEFINE_PRIVATE_STRING_TABLE_LOOKUP(mount_map_mode, MountMapMode);
×
740

741
typedef struct MountDirectoryParameters {
742
        MountMapMode mode;
743
        unsigned directory_fd_idx;
744
        unsigned userns_fd_idx;
745
        int read_only;
746
} MountDirectoryParameters;
747

748
typedef enum DirectoryOwnership {
749
        DIRECTORY_IS_ROOT_PEER_OWNED,  /* This is returned if the directory is owned by the root user and the peer is root */
750
        DIRECTORY_IS_ROOT_OWNED,       /* This is returned if the directory is owned by the root user (and the peer user is not root) */
751
        DIRECTORY_IS_PEER_OWNED,       /* This is returned if the directory is owned by the peer user (who is not root) */
752
        DIRECTORY_IS_FOREIGN_OWNED,    /* This is returned if the directory is owned by the foreign UID range */
753
        DIRECTORY_IS_OTHERWISE_OWNED,  /* This is returned if the directory is owned by something else */
754
        _DIRECTORY_OWNERSHIP_MAX,
755
        _DIRECTORY_OWNERSHIP_ERRNO_MAX = -ERRNO_MAX, /* Guarantee the whole negative errno range fits */
756
} DirectoryOwnership;
757

UNCOV
758
static MountMapMode default_mount_map_mode(DirectoryOwnership ownership) {
×
759
        /* Derives a suitable mapping mode from the ownership of the base tree */
760

761
        switch (ownership) {
×
762
        case DIRECTORY_IS_PEER_OWNED:
763
                return MOUNT_MAP_ROOT;     /* Map the peer's UID to root in the container */
764

UNCOV
765
        case DIRECTORY_IS_FOREIGN_OWNED:
×
UNCOV
766
                return MOUNT_MAP_FOREIGN;  /* Map the foreign UID range to the container's UID range */
×
767

UNCOV
768
        case DIRECTORY_IS_ROOT_PEER_OWNED:
×
769
        case DIRECTORY_IS_ROOT_OWNED:
770
        case DIRECTORY_IS_OTHERWISE_OWNED:
UNCOV
771
                return MOUNT_MAP_IDENTITY; /* Don't map */
×
772

UNCOV
773
        default:
×
774
                return _MOUNT_MAP_MODE_INVALID;
×
775
        }
776
}
777

UNCOV
778
static JSON_DISPATCH_ENUM_DEFINE(dispatch_mount_directory_mode, MountMapMode, mount_map_mode_from_string);
×
779

UNCOV
780
static DirectoryOwnership validate_directory_fd(
×
781
                int fd,
782
                uid_t peer_uid,
783
                uid_t *ret_current_owner_uid) {
784

785
        int r, fl;
×
786

UNCOV
787
        assert(fd >= 0);
×
UNCOV
788
        assert(uid_is_valid(peer_uid));
×
UNCOV
789
        assert(ret_current_owner_uid);
×
790

791
        /* Checks if the specified directory fd looks sane. Returns a DirectoryOwnership that categorizes the
792
         * ownership situation in comparison to the peer's UID.
793
         *
794
         * Note one key difference to image validation (as implemented above): for regular files if the
795
         * client provided us with an open fd it implies the client has access, as well as what kind of
796
         * access (i.e. ro or rw). But for directories this doesn't work the same way, as directories are
797
         * always opened read-only only. Hence we use a different mechanism to validate access to them: we
798
         * check if the directory is owned by the peer UID or by the foreign UID range (in the latter case
799
         * one of the parent directories must be owned by the peer though). */
800

801
        struct stat st;
×
802
        if (fstat(fd, &st) < 0)
×
UNCOV
803
                return log_debug_errno(errno, "Failed to stat() directory fd: %m");
×
804

805
        r = stat_verify_directory(&st);
×
806
        if (r < 0)
×
807
                return r;
808

809
        fl = fd_verify_safe_flags_full(fd, O_DIRECTORY|O_PATH);
×
810
        if (fl < 0)
×
811
                return log_debug_errno(fl, "Directory file descriptor has unsafe flags set: %m");
×
812

813
        if (st.st_uid == 0) {
×
UNCOV
814
                *ret_current_owner_uid = st.st_uid;
×
815
                if (peer_uid == 0) {
×
816
                        log_debug("Directory file descriptor points to root owned directory, who is also the peer.");
×
UNCOV
817
                        return DIRECTORY_IS_ROOT_PEER_OWNED;
×
818
                }
819
                log_debug("Directory file descriptor points to root owned directory.");
×
820
                return DIRECTORY_IS_ROOT_OWNED;
×
821
        }
UNCOV
822
        if (st.st_uid == peer_uid) {
×
UNCOV
823
                log_debug("Directory file descriptor points to peer owned directory.");
×
UNCOV
824
                *ret_current_owner_uid = st.st_uid;
×
UNCOV
825
                return DIRECTORY_IS_PEER_OWNED;
×
826
        }
827

828
        /* For bind mounted directories we check if they are either owned by the client's UID, or by the
829
         * foreign UID set, but in that case the parent directory must be owned by the client's UID, or some
830
         * directory iteratively up the chain */
831

832
        _cleanup_close_ int parent_fd = -EBADF;
×
833
        unsigned n_level;
834
        for (n_level = 0; n_level < 16; n_level++) {
×
835
                /* Stop iteration if we find a directory up the tree that is neither owned by the user, nor is from the foreign UID range */
UNCOV
836
                if (!uid_is_foreign(st.st_uid) || !gid_is_foreign(st.st_gid)) {
×
UNCOV
837
                        log_debug("Directory file descriptor points to directory which itself or its parents is neither owned by foreign UID range nor by the user.");
×
UNCOV
838
                        *ret_current_owner_uid = st.st_uid;
×
839
                        return DIRECTORY_IS_OTHERWISE_OWNED;
×
840
                }
841

842
                /* If the peer is root, then it doesn't matter if we find a parent owned by root, let's shortcut things. */
UNCOV
843
                if (peer_uid == 0) {
×
UNCOV
844
                        log_debug("Directory file descriptor is owned by foreign UID range, and peer is root.");
×
UNCOV
845
                        *ret_current_owner_uid = st.st_uid;
×
846
                        return DIRECTORY_IS_FOREIGN_OWNED;
×
847
                }
848

849
                /* Go one level up */
850
                _cleanup_close_ int new_parent_fd = openat(fd, "..", O_DIRECTORY|O_PATH|O_CLOEXEC);
×
851
                if (new_parent_fd < 0)
×
852
                        return log_debug_errno(errno, "Failed to open parent directory of directory file descriptor: %m");
×
853

UNCOV
854
                struct stat new_st;
×
855
                if (fstat(new_parent_fd, &new_st) < 0)
×
856
                        return log_debug_errno(errno, "Failed to stat parent directory of directory file descriptor: %m");
×
857

858
                /* Safety check to see if we hit the root dir */
UNCOV
859
                if (stat_inode_same(&st, &new_st)) {
×
UNCOV
860
                        log_debug("Directory file descriptor is owned by foreign UID range, but didn't find parent directory that is owned by peer among ancestors.");
×
861
                        *ret_current_owner_uid = st.st_uid;
×
862
                        return DIRECTORY_IS_OTHERWISE_OWNED;
×
863
                }
864

UNCOV
865
                if (new_st.st_uid == peer_uid) { /* Parent inode is owned by the peer. That's good! Everything's fine. */
×
UNCOV
866
                        log_debug("Directory file descriptor is owned by foreign UID range, and ancestor is owned by peer.");
×
867
                        *ret_current_owner_uid = st.st_uid;
×
868
                        return DIRECTORY_IS_FOREIGN_OWNED;
×
869
                }
870

871
                close_and_replace(parent_fd, new_parent_fd);
×
872
                st = new_st;
×
873
        }
874

UNCOV
875
        log_debug("Failed to find peer owned parent directory after %u levels, refusing.", n_level);
×
876
        *ret_current_owner_uid = st.st_uid;
×
UNCOV
877
        return DIRECTORY_IS_OTHERWISE_OWNED;
×
878
}
879

UNCOV
880
static int vl_method_mount_directory(
×
881
                sd_varlink *link,
882
                sd_json_variant *parameters,
883
                sd_varlink_method_flags_t flags,
884
                void *userdata) {
885

UNCOV
886
        static const sd_json_dispatch_field dispatch_table[] = {
×
887
                { "mode",                        SD_JSON_VARIANT_STRING,   dispatch_mount_directory_mode, offsetof(MountDirectoryParameters, mode),             0                 },
888
                { "directoryFileDescriptor",     SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,         offsetof(MountDirectoryParameters, directory_fd_idx), SD_JSON_MANDATORY },
889
                { "userNamespaceFileDescriptor", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint,         offsetof(MountDirectoryParameters, userns_fd_idx),    0                 },
890
                { "readOnly",                    SD_JSON_VARIANT_BOOLEAN,  sd_json_dispatch_tristate,     offsetof(MountDirectoryParameters, read_only),        0                 },
891
                VARLINK_DISPATCH_POLKIT_FIELD,
892
                {}
893
        };
894

UNCOV
895
        MountDirectoryParameters p = {
×
896
                .mode = MOUNT_MAP_AUTO,
897
                .directory_fd_idx = UINT_MAX,
898
                .userns_fd_idx = UINT_MAX,
899
                .read_only = -1,
900
        };
901
        _cleanup_close_ int directory_fd = -EBADF, userns_fd = -EBADF;
×
902
        Hashmap **polkit_registry = ASSERT_PTR(userdata);
×
UNCOV
903
        int r;
×
904

905
        r = sd_varlink_dispatch(link, parameters, dispatch_table, &p);
×
906
        if (r != 0)
×
907
                return r;
908

909
        if (p.directory_fd_idx == UINT_MAX)
×
910
                return sd_varlink_error_invalid_parameter_name(link, "directoryFileDescriptor");
×
911

912
        directory_fd = sd_varlink_peek_dup_fd(link, p.directory_fd_idx);
×
913
        if (directory_fd < 0)
×
914
                return log_debug_errno(directory_fd, "Failed to peek directory fd from client: %m");
×
915

UNCOV
916
        if (p.userns_fd_idx != UINT_MAX) {
×
UNCOV
917
                userns_fd = sd_varlink_peek_dup_fd(link, p.userns_fd_idx);
×
918
                if (userns_fd < 0)
×
919
                        return log_debug_errno(userns_fd, "Failed to peek user namespace fd from client: %m");
×
920
        }
921

UNCOV
922
        uid_t peer_uid;
×
923
        r = sd_varlink_get_peer_uid(link, &peer_uid);
×
924
        if (r < 0)
×
925
                return log_debug_errno(r, "Failed to get client UID: %m");
×
926

927
        uid_t current_owner_uid;
×
UNCOV
928
        DirectoryOwnership owned_by = validate_directory_fd(directory_fd, peer_uid, &current_owner_uid);
×
UNCOV
929
        if (owned_by == -EREMOTEIO)
×
930
                return sd_varlink_errorbo(link, "io.systemd.MountFileSystem.BadFileDescriptorFlags", SD_JSON_BUILD_PAIR_STRING("parameter", "directoryFileDescriptor"));
×
931
        if (owned_by < 0)
×
932
                return owned_by;
933

UNCOV
934
        r = validate_userns(link, &userns_fd);
×
935
        if (r != 0)
×
936
                return r;
937

938
        /* If no mode is specified, pick sensible default */
UNCOV
939
        if (p.mode <= 0) {
×
940
                p.mode = default_mount_map_mode(owned_by);
×
941
                assert(p.mode > 0);
×
942
        }
943

UNCOV
944
        _cleanup_free_ char *directory_path = NULL;
×
945
        (void) fd_get_path(directory_fd, &directory_path);
×
946

947
        log_debug("Mounting '%s' with mapping mode: %s", strna(directory_path), mount_map_mode_to_string(p.mode));
×
948

UNCOV
949
        const char *polkit_details[] = {
×
UNCOV
950
                "read_only", one_zero(p.read_only > 0),
×
951
                "directory", strna(directory_path),
×
952
                NULL,
953
        };
954

UNCOV
955
        const char *polkit_action, *polkit_untrusted_action;
×
UNCOV
956
        PolkitFlags polkit_flags;
×
UNCOV
957
        if (userns_fd < 0) {
×
958
                /* Mount into the host user namespace */
959
                polkit_action = "io.systemd.mount-file-system.mount-directory";
960
                polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-directory";
961
                polkit_flags = 0;
962
        } else {
963
                /* Mount into a private user namespace */
964
                polkit_action = "io.systemd.mount-file-system.mount-directory-privately";
×
UNCOV
965
                polkit_untrusted_action = "io.systemd.mount-file-system.mount-untrusted-directory-privately";
×
966

967
                /* If polkit is not around, let's allow mounting authenticated images by default */
968
                polkit_flags = POLKIT_DEFAULT_ALLOW;
×
969
        }
970

971
        /* We consider a directory "trusted" if it is owned by the peer or the foreign UID range */
UNCOV
972
        bool trusted_directory = IN_SET(owned_by, DIRECTORY_IS_ROOT_PEER_OWNED, DIRECTORY_IS_PEER_OWNED, DIRECTORY_IS_FOREIGN_OWNED);
×
973

974
        /* Let's definitely acquire the regular action privilege, for mounting properly signed images */
UNCOV
975
        r = varlink_verify_polkit_async_full(
×
976
                        link,
977
                        /* bus= */ NULL,
978
                        trusted_directory ? polkit_action : polkit_untrusted_action,
979
                        polkit_details,
980
                        /* good_user= */ UID_INVALID,
981
                        trusted_directory ? polkit_flags : 0,
982
                        polkit_registry);
UNCOV
983
        if (r <= 0)
×
984
                return r;
985

986
        /* Generate the common dissection directory here. We are not going to use it, but the clients might,
987
         * and they likely are unprivileged, hence cannot create it themselves. Hence let's just create it
988
         * here, if it is missing. */
989
        r = get_common_dissect_directory(NULL);
×
UNCOV
990
        if (r < 0)
×
991
                return r;
992

993
        _cleanup_close_ int mount_fd = open_tree_try_drop_idmap(
×
994
                        directory_fd,
995
                        "",
996
                        OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH);
997
        if (mount_fd < 0)
×
998
                return log_debug_errno(errno, "Failed to issue open_tree() of provided directory '%s': %m", strna(directory_path));
×
999

1000
        /* MOUNT_ATTR_IDMAP has possibly been cleared. Let's verify that the underlying data matches our expectations. */
1001
        struct stat unmapped_st;
×
1002
        if (fstat(mount_fd, &unmapped_st) < 0)
×
UNCOV
1003
                return log_debug_errno(errno, "Failed to stat unmapped inode: %m");
×
1004

UNCOV
1005
        r = stat_verify_directory(&unmapped_st);
×
UNCOV
1006
        if (r < 0)
×
1007
                return r;
1008

1009
        /* For now, let's simply refuse things if dropping the idmapping changed anything. For now that
1010
         * should be good enough, because the primary usecase for this (homed) will mount the foreign UID
1011
         * range 1:1. */
UNCOV
1012
        if (unmapped_st.st_uid != current_owner_uid)
×
1013
                return log_debug_errno(SYNTHETIC_ERRNO(EPERM), "Owner UID of mount after clearing ID mapping not the same anymore, refusing.");
×
1014

UNCOV
1015
        if (p.read_only > 0 && mount_setattr(
×
1016
                            mount_fd, "", AT_EMPTY_PATH,
UNCOV
1017
                            &(struct mount_attr) {
×
1018
                                    .attr_set = MOUNT_ATTR_RDONLY,
1019
                            }, MOUNT_ATTR_SIZE_VER0) < 0)
UNCOV
1020
                return log_debug_errno(errno, "Failed to enable read-only mode: %m");
×
1021

1022
        if (p.mode != MOUNT_MAP_IDENTITY) {
×
1023
                uid_t start;
×
1024

1025
                if (userns_fd >= 0) {
×
1026
                        _cleanup_(uid_range_freep) UIDRange *uid_range_outside = NULL, *uid_range_inside = NULL, *gid_range_outside = NULL, *gid_range_inside = NULL;
×
1027
                        r = uid_range_load_userns_by_fd(userns_fd, UID_RANGE_USERNS_OUTSIDE, &uid_range_outside);
×
1028
                        if (r < 0)
×
1029
                                return log_debug_errno(r, "Failed to load outside UID range of provided userns: %m");
×
1030
                        r = uid_range_load_userns_by_fd(userns_fd, UID_RANGE_USERNS_INSIDE, &uid_range_inside);
×
1031
                        if (r < 0)
×
1032
                                return log_debug_errno(r, "Failed to load inside UID range of provided userns: %m");
×
1033
                        r = uid_range_load_userns_by_fd(userns_fd, GID_RANGE_USERNS_OUTSIDE, &gid_range_outside);
×
1034
                        if (r < 0)
×
UNCOV
1035
                                return log_debug_errno(r, "Failed to load outside GID range of provided userns: %m");
×
UNCOV
1036
                        r = uid_range_load_userns_by_fd(userns_fd, GID_RANGE_USERNS_INSIDE, &gid_range_inside);
×
1037
                        if (r < 0)
×
1038
                                return log_debug_errno(r, "Failed to load inside GID range of provided userns: %m");
×
1039

1040
                        /* Be very strict for now */
1041
                        if (!uid_range_equal(uid_range_outside, gid_range_outside) ||
×
1042
                            !uid_range_equal(uid_range_inside, gid_range_inside) ||
×
1043
                            uid_range_outside->n_entries != 1 ||
×
1044
                            uid_range_outside->entries[0].nr != 0x10000 ||
×
UNCOV
1045
                            uid_range_inside->n_entries != 1 ||
×
1046
                            uid_range_inside->entries[0].start != 0 ||
×
UNCOV
1047
                            uid_range_inside->entries[0].nr != 0x10000)
×
UNCOV
1048
                                return sd_varlink_error_invalid_parameter_name(link, "userNamespaceFileDescriptor");
×
1049

1050
                        start = uid_range_outside->entries[0].start;
×
1051
                } else
1052
                        start = 0;
1053

UNCOV
1054
                _cleanup_free_ char *new_uid_map = NULL;
×
UNCOV
1055
                switch (p.mode) {
×
1056
                case MOUNT_MAP_ROOT:
×
1057
                        r = strextendf(&new_uid_map, UID_FMT " " UID_FMT " " UID_FMT,
×
1058
                                       peer_uid, start, (uid_t) 1);
1059
                        break;
1060
                case MOUNT_MAP_FOREIGN:
×
1061
                        r = strextendf(&new_uid_map, UID_FMT " " UID_FMT " " UID_FMT,
×
1062
                                       (uid_t) FOREIGN_UID_MIN, start, (uid_t) 0x10000);
1063
                        break;
UNCOV
1064
                default:
×
UNCOV
1065
                        assert_not_reached();
×
1066
                }
1067
                if (r < 0)
×
1068
                        return r;
1069

1070
                _cleanup_close_ int idmap_userns_fd = userns_acquire(new_uid_map, new_uid_map, /* setgroups_deny= */ true);
×
1071
                if (idmap_userns_fd < 0)
×
UNCOV
1072
                        return log_debug_errno(idmap_userns_fd, "Failed to acquire user namespace for id mapping: %m");
×
1073

UNCOV
1074
                if (mount_setattr(mount_fd, "", AT_EMPTY_PATH,
×
UNCOV
1075
                                  &(struct mount_attr) {
×
1076
                                          .attr_set = MOUNT_ATTR_IDMAP,
1077
                                          .userns_fd = idmap_userns_fd,
1078
                                          .propagation = MS_PRIVATE,
1079
                                  }, MOUNT_ATTR_SIZE_VER0) < 0)
1080
                        return log_debug_errno(errno, "Failed to enable id mapping: %m");
×
1081
        }
1082

UNCOV
1083
        if (userns_fd >= 0) {
×
UNCOV
1084
                r = nsresource_add_mount(userns_fd, mount_fd);
×
1085
                if (r < 0)
×
1086
                        return r;
1087
        }
1088

1089
        int fd_idx = sd_varlink_push_fd(link, mount_fd);
×
UNCOV
1090
        if (fd_idx < 0)
×
1091
                return fd_idx;
1092

UNCOV
1093
        TAKE_FD(mount_fd);
×
1094

UNCOV
1095
        return sd_varlink_replybo(
×
1096
                        link,
1097
                        SD_JSON_BUILD_PAIR_INTEGER("mountFileDescriptor", fd_idx));
1098
}
1099

1100
typedef struct MakeDirectoryParameters {
1101
        unsigned parent_fd_idx;
1102
        const char *name;
1103
        mode_t mode;
1104
} MakeDirectoryParameters;
1105

UNCOV
1106
static int vl_method_make_directory(
×
1107
                sd_varlink *link,
1108
                sd_json_variant *parameters,
1109
                sd_varlink_method_flags_t flags,
1110
                void *userdata) {
1111

UNCOV
1112
        static const sd_json_dispatch_field dispatch_table[] = {
×
1113
                { "parentFileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint,        offsetof(MakeDirectoryParameters, parent_fd_idx), SD_JSON_MANDATORY },
1114
                { "name",                 SD_JSON_VARIANT_STRING,        json_dispatch_const_filename, offsetof(MakeDirectoryParameters, name),          SD_JSON_MANDATORY },
1115
                { "mode",                 _SD_JSON_VARIANT_TYPE_INVALID, json_dispatch_access_mode,    offsetof(MakeDirectoryParameters, mode),          SD_JSON_STRICT    },
1116
                VARLINK_DISPATCH_POLKIT_FIELD,
1117
                {}
1118
        };
1119

1120
        MakeDirectoryParameters p = {
×
1121
                .parent_fd_idx = UINT_MAX,
1122
                .mode = MODE_INVALID,
1123
        };
1124
        Hashmap **polkit_registry = ASSERT_PTR(userdata);
×
1125
        int r;
×
1126

1127
        r = sd_varlink_dispatch(link, parameters, dispatch_table, &p);
×
1128
        if (r != 0)
×
UNCOV
1129
                return r;
×
1130

UNCOV
1131
        if (p.mode == MODE_INVALID)
×
1132
                p.mode = 0700;
×
1133
        else
UNCOV
1134
                p.mode &= 0775; /* refuse generating world writable dirs */
×
1135

1136
        if (p.parent_fd_idx == UINT_MAX)
×
1137
                return sd_varlink_error_invalid_parameter_name(link, "parentFileDescriptor");
×
1138

1139
        _cleanup_close_ int parent_fd = sd_varlink_peek_dup_fd(link, p.parent_fd_idx);
×
1140
        if (parent_fd < 0)
×
1141
                return log_debug_errno(parent_fd, "Failed to peek parent directory fd from client: %m");
×
1142

UNCOV
1143
        uid_t peer_uid;
×
1144
        r = sd_varlink_get_peer_uid(link, &peer_uid);
×
1145
        if (r < 0)
×
UNCOV
1146
                return log_debug_errno(r, "Failed to get client UID: %m");
×
1147

1148
        struct stat parent_stat;
×
1149
        if (fstat(parent_fd, &parent_stat) < 0)
×
1150
                return r;
1151

1152
        r = stat_verify_directory(&parent_stat);
×
1153
        if (r < 0)
×
1154
                return r;
1155

1156
        int fl = fd_verify_safe_flags_full(parent_fd, O_DIRECTORY);
×
1157
        if (fl < 0)
×
UNCOV
1158
                return log_debug_errno(fl, "Directory file descriptor has unsafe flags set: %m");
×
1159

1160
        _cleanup_free_ char *parent_path = NULL;
×
UNCOV
1161
        (void) fd_get_path(parent_fd, &parent_path);
×
1162

1163
        _cleanup_free_ char *new_path = parent_path ? path_join(parent_path, p.name) : NULL;
×
UNCOV
1164
        log_debug("Asked to make directory: %s", strna(new_path));
×
1165

UNCOV
1166
        const char *polkit_details[] = {
×
1167
                "directory", strna(new_path),
×
1168
                NULL,
1169
        };
1170

UNCOV
1171
        const char *polkit_action;
×
UNCOV
1172
        PolkitFlags polkit_flags;
×
1173
        if (parent_stat.st_uid != peer_uid) {
×
1174
                polkit_action = "io.systemd.mount-file-system.make-directory-untrusted";
1175
                polkit_flags = 0;
1176
        } else {
1177
                polkit_action = "io.systemd.mount-file-system.make-directory";
×
UNCOV
1178
                polkit_flags = POLKIT_DEFAULT_ALLOW;
×
1179
        }
1180

UNCOV
1181
        r = varlink_verify_polkit_async_full(
×
1182
                        link,
1183
                        /* bus= */ NULL,
1184
                        polkit_action,
1185
                        polkit_details,
1186
                        /* good_user= */ UID_INVALID,
1187
                        polkit_flags,
1188
                        polkit_registry);
1189
        if (r <= 0)
×
1190
                return r;
1191

UNCOV
1192
        _cleanup_free_ char *t = NULL;
×
1193
        r = tempfn_random(p.name, "mountfsd", &t);
×
1194
        if (r < 0)
×
1195
                return r;
1196

1197
        _cleanup_close_ int fd = open_mkdir_at(parent_fd, t, O_CLOEXEC, p.mode);
×
1198
        if (fd < 0)
×
1199
                return fd;
1200

1201
        r = RET_NERRNO(fchmod(fd, p.mode)); /* Set mode explicitly, as paranoia regarding umask games */
×
1202
        if (r < 0)
×
1203
                goto fail;
×
1204

1205
        r = RET_NERRNO(fchown(fd, FOREIGN_UID_BASE, FOREIGN_UID_BASE));
×
1206
        if (r < 0)
×
1207
                goto fail;
×
1208

1209
        r = rename_noreplace(parent_fd, t, parent_fd, p.name);
×
UNCOV
1210
        if (r < 0)
×
1211
                goto fail;
×
1212

1213
        t = mfree(t); /* temporary filename no longer exists */
×
1214

UNCOV
1215
        int fd_idx = sd_varlink_push_fd(link, fd);
×
UNCOV
1216
        if (fd_idx < 0) {
×
1217
                r = fd_idx;
×
UNCOV
1218
                goto fail;
×
1219
        }
1220

UNCOV
1221
        TAKE_FD(fd);
×
1222

1223
        return sd_varlink_replybo(
×
1224
                        link,
1225
                        SD_JSON_BUILD_PAIR_INTEGER("directoryFileDescriptor", fd_idx));
1226

UNCOV
1227
fail:
×
1228
        (void) unlinkat(parent_fd, t ?: p.name, AT_REMOVEDIR);
×
1229
        return r;
×
1230
}
1231

1232
static int process_connection(sd_varlink_server *server, int _fd) {
×
UNCOV
1233
        _cleanup_close_ int fd = TAKE_FD(_fd); /* always take possession */
×
1234
        _cleanup_(sd_varlink_close_unrefp) sd_varlink *vl = NULL;
×
1235
        _cleanup_(sd_event_unrefp) sd_event *event = NULL;
×
UNCOV
1236
        int r;
×
1237

1238
        r = sd_event_new(&event);
×
1239
        if (r < 0)
×
1240
                return r;
1241

1242
        r = sd_varlink_server_attach_event(server, event, 0);
×
1243
        if (r < 0)
×
1244
                return log_error_errno(r, "Failed to attach Varlink server to event loop: %m");
×
1245

1246
        r = sd_varlink_server_add_connection(server, fd, &vl);
×
1247
        if (r < 0)
×
UNCOV
1248
                return log_error_errno(r, "Failed to add connection: %m");
×
1249

1250
        TAKE_FD(fd);
×
1251
        vl = sd_varlink_ref(vl);
×
1252

1253
        r = sd_event_loop(event);
×
1254
        if (r < 0)
×
1255
                return log_error_errno(r, "Failed to run event loop: %m");
×
1256

UNCOV
1257
        r = sd_varlink_server_detach_event(server);
×
UNCOV
1258
        if (r < 0)
×
UNCOV
1259
                return log_error_errno(r, "Failed to detach Varlink server from event loop: %m");
×
1260

1261
        return 0;
1262
}
1263

1264
static int run(int argc, char *argv[]) {
×
1265
        usec_t start_time, listen_idle_usec, last_busy_usec = USEC_INFINITY;
×
1266
        _cleanup_(sd_varlink_server_unrefp) sd_varlink_server *server = NULL;
×
UNCOV
1267
        _cleanup_hashmap_free_ Hashmap *polkit_registry = NULL;
×
1268
        _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
×
UNCOV
1269
        unsigned n_iterations = 0;
×
1270
        int m, listen_fd, r;
×
1271

1272
        log_setup();
×
1273

1274
        m = sd_listen_fds(false);
×
1275
        if (m < 0)
×
1276
                return log_error_errno(m, "Failed to determine number of listening fds: %m");
×
UNCOV
1277
        if (m == 0)
×
1278
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No socket to listen on received.");
×
UNCOV
1279
        if (m > 1)
×
1280
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Worker can only listen on a single socket at a time.");
×
1281

1282
        listen_fd = SD_LISTEN_FDS_START;
×
1283

1284
        r = fd_nonblock(listen_fd, false);
×
UNCOV
1285
        if (r < 0)
×
UNCOV
1286
                return log_error_errno(r, "Failed to turn off non-blocking mode for listening socket: %m");
×
1287

1288
        r = varlink_server_new(&server,
×
1289
                               SD_VARLINK_SERVER_INHERIT_USERDATA|
1290
                               SD_VARLINK_SERVER_ALLOW_FD_PASSING_INPUT|SD_VARLINK_SERVER_ALLOW_FD_PASSING_OUTPUT,
1291
                               &polkit_registry);
1292
        if (r < 0)
×
1293
                return log_error_errno(r, "Failed to allocate server: %m");
×
1294

1295
        r = sd_varlink_server_add_interface(server, &vl_interface_io_systemd_MountFileSystem);
×
UNCOV
1296
        if (r < 0)
×
UNCOV
1297
                return log_error_errno(r, "Failed to add MountFileSystem interface to varlink server: %m");
×
1298

UNCOV
1299
        r = sd_varlink_server_bind_method_many(
×
1300
                        server,
1301
                        "io.systemd.MountFileSystem.MountImage",     vl_method_mount_image,
1302
                        "io.systemd.MountFileSystem.MountDirectory", vl_method_mount_directory,
1303
                        "io.systemd.MountFileSystem.MakeDirectory",  vl_method_make_directory);
1304
        if (r < 0)
×
1305
                return log_error_errno(r, "Failed to bind methods: %m");
×
1306

1307
        r = sd_varlink_server_set_exit_on_idle(server, true);
×
1308
        if (r < 0)
×
1309
                return log_error_errno(r, "Failed to enable exit-on-idle mode: %m");
×
1310

UNCOV
1311
        r = getenv_bool("MOUNTFS_FIXED_WORKER");
×
1312
        if (r < 0)
×
1313
                return log_error_errno(r, "Failed to parse MOUNTFSD_FIXED_WORKER: %m");
×
1314
        listen_idle_usec = r ? USEC_INFINITY : LISTEN_IDLE_USEC;
×
1315

1316
        r = pidref_set_parent(&parent);
×
UNCOV
1317
        if (r < 0)
×
1318
                return log_error_errno(r, "Failed to acquire pidfd of parent process: %m");
×
1319

1320
        start_time = now(CLOCK_MONOTONIC);
×
1321

UNCOV
1322
        for (;;) {
×
1323
                _cleanup_close_ int fd = -EBADF;
×
1324
                usec_t n;
×
1325

1326
                /* Exit the worker in regular intervals, to flush out all memory use */
UNCOV
1327
                if (n_iterations++ > ITERATIONS_MAX) {
×
1328
                        log_debug("Exiting worker, processed %u iterations, that's enough.", n_iterations);
×
1329
                        break;
1330
                }
1331

1332
                n = now(CLOCK_MONOTONIC);
×
UNCOV
1333
                if (n >= usec_add(start_time, RUNTIME_MAX_USEC)) {
×
UNCOV
1334
                        log_debug("Exiting worker, ran for %s, that's enough.",
×
1335
                                  FORMAT_TIMESPAN(usec_sub_unsigned(n, start_time), 0));
UNCOV
1336
                        break;
×
1337
                }
1338

UNCOV
1339
                if (last_busy_usec == USEC_INFINITY)
×
1340
                        last_busy_usec = n;
UNCOV
1341
                else if (listen_idle_usec != USEC_INFINITY && n >= usec_add(last_busy_usec, listen_idle_usec)) {
×
UNCOV
1342
                        log_debug("Exiting worker, been idle for %s.",
×
1343
                                  FORMAT_TIMESPAN(usec_sub_unsigned(n, last_busy_usec), 0));
1344
                        break;
×
1345
                }
1346

1347
                (void) rename_process("systemd-mountwork: waiting...");
×
1348
                fd = RET_NERRNO(accept4(listen_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC));
×
UNCOV
1349
                (void) rename_process("systemd-mountwork: processing...");
×
1350

1351
                if (fd == -EAGAIN)
×
UNCOV
1352
                        continue; /* The listening socket has SO_RECVTIMEO set, hence a timeout is expected
×
1353
                                   * after a while, let's check if it's time to exit though. */
1354
                if (fd == -EINTR)
×
UNCOV
1355
                        continue; /* Might be that somebody attached via strace, let's just continue in that
×
1356
                                   * case */
UNCOV
1357
                if (fd < 0)
×
UNCOV
1358
                        return log_error_errno(fd, "Failed to accept() from listening socket: %m");
×
1359

1360
                if (now(CLOCK_MONOTONIC) <= usec_add(n, PRESSURE_SLEEP_TIME_USEC)) {
×
1361
                        /* We only slept a very short time? If so, let's see if there are more sockets
1362
                         * pending, and if so, let's ask our parent for more workers */
1363

1364
                        r = fd_wait_for_event(listen_fd, POLLIN, 0);
×
1365
                        if (r < 0)
×
1366
                                return log_error_errno(r, "Failed to test for POLLIN on listening socket: %m");
×
1367

1368
                        if (FLAGS_SET(r, POLLIN)) {
×
1369
                                r = pidref_kill(&parent, SIGUSR2);
×
UNCOV
1370
                                if (r == -ESRCH)
×
UNCOV
1371
                                        return log_error_errno(r, "Parent already died?");
×
UNCOV
1372
                                if (r < 0)
×
1373
                                        return log_error_errno(r, "Failed to send SIGUSR2 signal to parent: %m");
×
1374
                        }
1375
                }
1376

UNCOV
1377
                (void) process_connection(server, TAKE_FD(fd));
×
UNCOV
1378
                last_busy_usec = USEC_INFINITY;
×
1379
        }
1380

1381
        return 0;
1382
}
1383

UNCOV
1384
DEFINE_MAIN_FUNCTION(run);
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc