• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 23927985597

02 Apr 2026 07:45PM UTC coverage: 72.362% (+0.02%) from 72.343%
23927985597

push

github

daandemeyer
ci: Drop base64 encoding in claude review workflow

Doesn't seem to work nearly as good as the previous solution which
just told claude not to escape stuff.

319121 of 441004 relevant lines covered (72.36%)

1167673.48 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.82
/src/shared/dissect-image.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <fnmatch.h>
4
#include <linux/loop.h>
5
#include <sys/file.h>
6
#include <sys/mount.h>
7
#include <unistd.h>
8

9
#if HAVE_OPENSSL
10
#include <openssl/err.h>
11
#include <openssl/pem.h>
12
#include <openssl/x509.h>
13
#endif
14

15
#include "sd-device.h"
16
#include "sd-id128.h"
17
#include "sd-json.h"
18
#include "sd-varlink.h"
19

20
#include "architecture.h"
21
#include "ask-password-api.h"
22
#include "blkid-util.h"
23
#include "blockdev-util.h"
24
#include "btrfs-util.h"
25
#include "chase.h"
26
#include "conf-files.h"
27
#include "constants.h"
28
#include "copy.h"
29
#include "cryptsetup-util.h"
30
#include "device-private.h"
31
#include "devnum-util.h"
32
#include "dissect-image.h"
33
#include "dm-util.h"
34
#include "env-file.h"
35
#include "env-util.h"
36
#include "errno-util.h"
37
#include "extension-util.h"
38
#include "extract-word.h"
39
#include "fd-util.h"
40
#include "fileio.h"
41
#include "format-util.h"
42
#include "fsck-util.h"
43
#include "fstab-util.h"
44
#include "gpt.h"
45
#include "hash-funcs.h"
46
#include "hexdecoct.h"
47
#include "hostname-setup.h"
48
#include "image-policy.h"
49
#include "import-util.h"
50
#include "io-util.h"
51
#include "iovec-util.h"
52
#include "json-util.h"
53
#include "libmount-util.h"
54
#include "loop-util.h"
55
#include "mkdir-label.h"
56
#include "mount-util.h"
57
#include "mountpoint-util.h"
58
#include "namespace-util.h"
59
#include "nulstr-util.h"
60
#include "openssl-util.h"
61
#include "os-util.h"
62
#include "path-util.h"
63
#include "pcrextend-util.h"
64
#include "pidref.h"
65
#include "proc-cmdline.h"
66
#include "process-util.h"
67
#include "resize-fs.h"
68
#include "rm-rf.h"
69
#include "runtime-scope.h"
70
#include "siphash24.h"
71
#include "stat-util.h"
72
#include "stdio-util.h"
73
#include "string-util.h"
74
#include "strv.h"
75
#include "time-util.h"
76
#include "udev-util.h"
77
#include "user-util.h"
78
#include "varlink-util.h"
79
#include "xattr-util.h"
80

81
/* how many times to wait for the device nodes to appear */
82
#define N_DEVICE_NODE_LIST_ATTEMPTS 10
83

84
static int allowed_fstypes(char ***ret_strv) {
3,314✔
85
        _cleanup_strv_free_ char **l = NULL;
3,314✔
86
        const char *e;
3,314✔
87

88
        assert(ret_strv);
3,314✔
89

90
        e = secure_getenv("SYSTEMD_DISSECT_FILE_SYSTEMS");
3,314✔
91
        if (e)
3,314✔
92
                l = strv_split(e, ":");
×
93
        else
94
                l = strv_new("btrfs",
3,314✔
95
                             "erofs",
96
                             "ext4",
97
                             "f2fs",
98
                             "squashfs",
99
                             "vfat",
100
                             "xfs");
101
        if (!l)
3,314✔
102
                return -ENOMEM;
103

104
        *ret_strv = TAKE_PTR(l);
3,314✔
105
        return 0;
3,314✔
106
}
107

108
int dissect_fstype_ok(const char *fstype) {
394✔
109
        _cleanup_strv_free_ char **l = NULL;
394✔
110
        int r;
394✔
111

112
        /* When we automatically mount file systems, be a bit conservative by default what we are willing to
113
         * mount, just as an extra safety net to not mount with badly maintained legacy file system
114
         * drivers. */
115

116
        r = allowed_fstypes(&l);
394✔
117
        if (r < 0)
394✔
118
                return r;
119

120
        if (strv_contains(l, fstype))
394✔
121
                return true;
122

123
        log_debug("File system type '%s' is not allowed to be mounted as result of automatic dissection.", fstype);
×
124
        return false;
125
}
126

127
#if HAVE_BLKID
128
static const char *getenv_fstype(PartitionDesignator d) {
720✔
129

130
        if (d < 0 ||
720✔
131
            partition_designator_is_verity(d) ||
584✔
132
            d == PARTITION_SWAP)
133
                return NULL;
134

135
        char *v = strjoina("SYSTEMD_DISSECT_FSTYPE_", partition_designator_to_string(d));
2,000✔
136
        return secure_getenv(ascii_strupper(v));
400✔
137
}
138
#endif
139

140
int probe_sector_size(int fd, uint32_t *ret) {
2,493✔
141

142
        assert(fd >= 0);
2,493✔
143
        assert(ret);
2,493✔
144

145
        ssize_t ssz = gpt_probe(fd, /* ret_header= */ NULL, /* ret_entries= */ NULL, /* ret_n_entries= */ NULL, /* ret_entry_size= */ NULL);
2,493✔
146
        if (ssz == -ENOTUNIQ)
2,493✔
147
                return log_debug_errno(ssz,
×
148
                                       "Detected valid partition table at offsets matching multiple sector sizes, refusing.");
149
        if (ssz < 0)
2,493✔
150
                return ssz;
×
151
        if (ssz == 0) {
2,493✔
152
                log_debug("Couldn't find any partition table to derive sector size of.");
2,308✔
153
                *ret = 512; /* pick the traditional default */
2,308✔
154
                return 0;   /* indicate we didn't find it */
2,308✔
155
        }
156

157
        log_debug("Determined sector size %" PRIu32 " based on discovered partition table.", (uint32_t) ssz);
185✔
158
        *ret = ssz;
185✔
159
        return 1; /* indicate we *did* find it */
185✔
160
}
161

162
int probe_sector_size_prefer_ioctl(int fd, uint32_t *ret) {
1,552✔
163
        struct stat st;
1,552✔
164

165
        assert(fd >= 0);
1,552✔
166
        assert(ret);
1,552✔
167

168
        /* Just like probe_sector_size(), but if we are looking at a block device, will use the already
169
         * configured sector size rather than probing by contents */
170

171
        if (fstat(fd, &st) < 0)
1,552✔
172
                return -errno;
×
173

174
        if (S_ISBLK(st.st_mode)) {
1,552✔
175
                int r;
1,504✔
176

177
                r = blockdev_get_sector_size(fd, ret);
1,504✔
178
                if (r < 0)
1,504✔
179
                        return r;
180

181
                return 1; /* indicate we *did* find it, like probe_sector_size() does */
1,504✔
182
        }
183

184
        return probe_sector_size(fd, ret);
48✔
185
}
186

187
#if HAVE_BLKID
188
static int probe_blkid_filter(blkid_probe p) {
2,920✔
189
        _cleanup_strv_free_ char **fstypes = NULL;
2,920✔
190
        int r;
2,920✔
191

192
        assert(p);
2,920✔
193

194
        r = allowed_fstypes(&fstypes);
2,920✔
195
        if (r < 0)
2,920✔
196
                return r;
197

198
        errno = 0;
2,920✔
199
        r = sym_blkid_probe_filter_superblocks_type(p, BLKID_FLTR_ONLYIN, fstypes);
2,920✔
200
        if (r != 0)
2,920✔
201
                return errno_or_else(EINVAL);
×
202

203
        errno = 0;
2,920✔
204
        r = sym_blkid_probe_filter_superblocks_usage(p, BLKID_FLTR_NOTIN, BLKID_USAGE_RAID);
2,920✔
205
        if (r != 0)
2,920✔
206
                return errno_or_else(EINVAL);
×
207

208
        return 0;
209
}
210
#endif
211

212
int probe_filesystem_full(
322✔
213
                int fd,
214
                const char *path,
215
                uint64_t offset,
216
                uint64_t size,
217
                bool restrict_fstypes,
218
                char **ret_fstype) {
219

220
        /* Try to find device content type and return it in *ret_fstype. If nothing is found,
221
         * 0/NULL will be returned. -EUCLEAN will be returned for ambiguous results, and a
222
         * different error otherwise. */
223

224
#if HAVE_BLKID
225
        _cleanup_(blkid_free_probep) blkid_probe b = NULL;
×
226
        _cleanup_free_ char *path_by_fd = NULL;
322✔
227
        _cleanup_close_ int fd_close = -EBADF;
322✔
228
        const char *fstype;
322✔
229
        int r;
322✔
230

231
        assert(fd >= 0 || path);
322✔
232
        assert(ret_fstype);
322✔
233

234
        r = dlopen_libblkid();
322✔
235
        if (r < 0)
322✔
236
                return r;
237

238
        if (fd < 0) {
322✔
239
                fd_close = open(path, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
×
240
                if (fd_close < 0)
×
241
                        return -errno;
×
242

243
                fd = fd_close;
244
        }
245

246
        if (!path) {
322✔
247
                r = fd_get_path(fd, &path_by_fd);
×
248
                if (r < 0)
×
249
                        return r;
250

251
                path = path_by_fd;
×
252
        }
253

254
        if (size == 0) /* empty size? nothing found! */
322✔
255
                goto not_found;
×
256

257
        b = sym_blkid_new_probe();
322✔
258
        if (!b)
322✔
259
                return -ENOMEM;
260

261
        if (restrict_fstypes) {
322✔
262
                r = probe_blkid_filter(b);
322✔
263
                if (r < 0)
322✔
264
                        return r;
265
        }
266

267
        /* The Linux kernel maintains separate block device caches for main ("whole") and partition block
268
         * devices, which means making a change to one might not be reflected immediately when reading via
269
         * the other. That's massively confusing when mixing accesses to such devices. Let's address this in
270
         * a limited way: when probing a file system that is not at the beginning of the block device we
271
         * apparently probe a partition via the main block device, and in that case let's first flush the
272
         * main block device cache, so that we get the data that the per-partition block device last
273
         * sync'ed on.
274
         *
275
         * This only works under the assumption that any tools that write to the partition block devices
276
         * issue an syncfs()/fsync() on the device after making changes. Typically file system formatting
277
         * tools that write a superblock onto a partition block device do that, however. */
278
        if (offset != 0)
322✔
279
                if (ioctl(fd, BLKFLSBUF, 0) < 0)
123✔
280
                        log_debug_errno(errno, "Failed to flush block device cache, ignoring: %m");
16✔
281

282
        errno = 0;
322✔
283
        r = sym_blkid_probe_set_device(
322✔
284
                        b,
285
                        fd,
286
                        offset,
287
                        size == UINT64_MAX ? 0 : size); /* when blkid sees size=0 it understands "everything". We prefer using UINT64_MAX for that */
288
        if (r != 0)
322✔
289
                return errno_or_else(ENOMEM);
×
290

291
        sym_blkid_probe_enable_superblocks(b, 1);
322✔
292
        sym_blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
322✔
293

294
        errno = 0;
322✔
295
        r = sym_blkid_do_safeprobe(b);
322✔
296
        if (r == _BLKID_SAFEPROBE_NOT_FOUND)
322✔
297
                goto not_found;
3✔
298
        if (r == _BLKID_SAFEPROBE_AMBIGUOUS)
319✔
299
                return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN),
×
300
                                       "Results ambiguous for partition %s", path);
301
        if (r == _BLKID_SAFEPROBE_ERROR)
319✔
302
                return log_debug_errno(errno_or_else(EIO), "Failed to probe partition %s: %m", path);
×
303

304
        assert(r == _BLKID_SAFEPROBE_FOUND);
319✔
305

306
        (void) sym_blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
319✔
307
        if (fstype) {
319✔
308
                log_debug("Probed fstype '%s' on partition %s.", fstype, path);
319✔
309
                return strdup_to_full(ret_fstype, fstype);
319✔
310
        }
311

312
not_found:
×
313
        log_debug("No type detected on partition %s", path);
3✔
314
        *ret_fstype = NULL;
3✔
315
        return 0;
3✔
316
#else
317
        return -EOPNOTSUPP;
318
#endif
319
}
320

321
#if HAVE_BLKID
322
static int image_policy_may_use(
2,899✔
323
                const ImagePolicy *policy,
324
                PartitionDesignator designator) {
325

326
        PartitionPolicyFlags f;
2,899✔
327

328
        /* For each partition we find in the partition table do a first check if it may exist at all given
329
         * the policy, or if it shall be ignored. */
330

331
        f = image_policy_get_exhaustively(policy, designator);
2,899✔
332
        if (f < 0)
2,899✔
333
                return f;
334

335
        if ((f & _PARTITION_POLICY_USE_MASK) == PARTITION_POLICY_ABSENT)
2,899✔
336
                /* only flag set in policy is "absent"? then this partition may not exist at all */
337
                return log_debug_errno(
4✔
338
                                SYNTHETIC_ERRNO(ERFKILL),
339
                                "Partition of designator '%s' exists, but not allowed by policy, refusing.",
340
                                partition_designator_to_string(designator));
341
        if ((f & _PARTITION_POLICY_USE_MASK & ~PARTITION_POLICY_ABSENT) == PARTITION_POLICY_UNUSED) {
2,895✔
342
                /* only "unused" or "unused" + "absent" are set? then don't use it */
343
                log_debug("Partition of designator '%s' exists, and policy dictates to ignore it, doing so.",
33✔
344
                          partition_designator_to_string(designator));
345
                return false; /* ignore! */
33✔
346
        }
347

348
        return true; /* use! */
349
}
350

351
static int image_policy_check_protection(
5,801✔
352
                const ImagePolicy *policy,
353
                PartitionDesignator designator,
354
                PartitionPolicyFlags found_flags) {
355

356
        PartitionPolicyFlags policy_flags;
5,801✔
357

358
        /* Checks if the flags in the policy for the designated partition overlap the flags of what we found */
359

360
        if (found_flags < 0)
5,801✔
361
                return found_flags;
362

363
        policy_flags = image_policy_get_exhaustively(policy, designator);
5,801✔
364
        if (policy_flags < 0)
5,801✔
365
                return policy_flags;
366

367
        if ((found_flags & policy_flags) == 0) {
5,801✔
368
                _cleanup_free_ char *found_flags_string = NULL, *policy_flags_string = NULL;
6✔
369

370
                (void) partition_policy_flags_to_string(found_flags, /* simplify= */ true, &found_flags_string);
6✔
371
                (void) partition_policy_flags_to_string(policy_flags, /* simplify= */ true, &policy_flags_string);
6✔
372

373
                return log_debug_errno(SYNTHETIC_ERRNO(ERFKILL), "Partition %s discovered with policy '%s' but '%s' was required, refusing.",
6✔
374
                                       partition_designator_to_string(designator),
375
                                       strnull(found_flags_string), strnull(policy_flags_string));
376
        }
377

378
        return 0;
379
}
380

381
/* internal LUKS2 header defines */
382
#define LUKS2_FIXED_HDR_SIZE UINT64_C(0x1000)
383
#define LUKS2_MAGIC "LUKS\xba\xbe"
384

385
/* Matches the beginning of 'struct luks2_hdr_disk' from cryptsetup */
386
struct luks_header_incomplete {
387
                char luks_magic[sizeof(LUKS2_MAGIC) - 1];
388
                be16_t version;
389
                be64_t hdr_len;
390
};
391

392
/* 'integrity' information from LUKS JSON header. Currently, only 'type' is extracted/checked. */
393
struct luks_integrity_data {
394
        char *type;
395
};
396

397
static int integrity_information(const char *name, sd_json_variant *v, sd_json_dispatch_flags_t flags, void *userdata) {
1✔
398
        static const sd_json_dispatch_field table[] = {
1✔
399
                { "type", SD_JSON_VARIANT_STRING, sd_json_dispatch_const_string, offsetof(struct luks_integrity_data, type), SD_JSON_MANDATORY },
400
                {}
401
        };
402

403
        return sd_json_dispatch(v, table, flags, userdata);
1✔
404
}
405

406
/* cryptsetup needs a loop device to work with a partition which has offset/size but
407
 * dissect may be running unprivileged. Implement a minimal custom LUKS header parser
408
 * checking integrity protection information. */
409
static int partition_is_luks2_integrity(int part_fd, uint64_t offset, uint64_t size) {
5✔
410
        _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL;
5✔
411
        _cleanup_free_ char *json = NULL;
5✔
412
        sd_json_variant *w;
5✔
413
        const char *key;
5✔
414
        struct luks_header_incomplete header;
5✔
415
        ssize_t sz, json_len;
5✔
416
        int r;
5✔
417

418
        assert(part_fd >= 0);
5✔
419

420
        if (size < LUKS2_FIXED_HDR_SIZE) {
5✔
421
                log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Partition is too small to contain a LUKS header.");
×
422
                return 0;
×
423
        }
424

425
        sz = pread(part_fd, &header, sizeof(header), offset);
5✔
426
        if (sz < 0)
5✔
427
                return log_error_errno(errno, "Failed to read LUKS header.");
×
428
        if (sz != sizeof(header))
5✔
429
                return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read LUKS header.");
×
430

431
        if (memcmp(header.luks_magic, LUKS2_MAGIC, sizeof(header.luks_magic)) != 0)
5✔
432
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Partition's magic is not LUKS.");
×
433

434
        if (be16toh(header.version) != 2)
5✔
435
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unsupported LUKS header version: %" PRIu16 ".", be16toh(header.version));
×
436

437
        if (be64toh(header.hdr_len) > size)
5✔
438
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS header length exceeds partition size.");
×
439

440
        if (be64toh(header.hdr_len) <= LUKS2_FIXED_HDR_SIZE || offset > UINT64_MAX - be64toh(header.hdr_len))
5✔
441
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid LUKS header length: %" PRIu64 ".", be64toh(header.hdr_len));
×
442

443
        json_len = be64toh(header.hdr_len) - LUKS2_FIXED_HDR_SIZE;
5✔
444
        json = malloc(json_len + 1);
5✔
445
        if (!json)
5✔
446
                return -ENOMEM;
447

448
        sz = pread(part_fd, json, json_len, offset + LUKS2_FIXED_HDR_SIZE);
5✔
449
        if (sz < 0)
5✔
450
                return log_error_errno(errno, "Failed to read LUKS JSON header.");
×
451
        if (sz != json_len)
5✔
452
                return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read LUKS JSON header.");
×
453
        json[sz] = '\0';
5✔
454

455
        r = sd_json_parse(json, /* flags= */ 0, &v, /* reterr_line= */ NULL, /* reterr_column= */ NULL);
5✔
456
        if (r < 0)
5✔
457
                return log_error_errno(r, "Failed to parse LUKS JSON header.");
×
458

459
        v = sd_json_variant_by_key(v, "segments");
5✔
460
        if (!v || !sd_json_variant_is_object(v)) {
5✔
461
                log_debug("LUKS JSON header lacks 'segments' information, assuming no integrity.");
×
462
                return 0;
×
463
        }
464

465
        /* Verify that all segments have integrity protection */
466
        JSON_VARIANT_OBJECT_FOREACH(key, w, v) {
6✔
467
                struct luks_integrity_data data = {};
5✔
468

469
                static const sd_json_dispatch_field dispatch_segment[] = {
5✔
470
                        { "integrity", SD_JSON_VARIANT_OBJECT, integrity_information, 0, SD_JSON_MANDATORY },
471
                        {}
472
                };
473

474
                r = sd_json_dispatch(w, dispatch_segment, SD_JSON_ALLOW_EXTENSIONS, &data);
5✔
475
                if (r < 0) {
5✔
476
                        log_debug("Failed to get integrity information from LUKS JSON for segment %s, assuming no integrity.", key);
4✔
477
                        return 0;
4✔
478
                }
479

480
                /* We don't require a particular integrity algorithm, everything but 'none' (which shouldn't
481
                 * be there in the first place but is theoretically possible) works. */
482
                if (streq(data.type, "none"))
1✔
483
                        return 0;
484
        }
485

486
        return 1;
1✔
487
}
488

489
static int image_policy_check_partition_flags(
2,836✔
490
                const ImagePolicy *policy,
491
                PartitionDesignator designator,
492
                uint64_t gpt_flags) {
493

494
        PartitionPolicyFlags policy_flags;
2,836✔
495
        bool b;
2,836✔
496

497
        /* Checks if the partition flags in the policy match reality */
498

499
        policy_flags = image_policy_get_exhaustively(policy, designator);
2,836✔
500
        if (policy_flags < 0)
2,836✔
501
                return policy_flags;
502

503
        b = FLAGS_SET(gpt_flags, SD_GPT_FLAG_READ_ONLY);
2,836✔
504
        if ((policy_flags & _PARTITION_POLICY_READ_ONLY_MASK) == (b ? PARTITION_POLICY_READ_ONLY_OFF : PARTITION_POLICY_READ_ONLY_ON))
5,512✔
505
                return log_debug_errno(SYNTHETIC_ERRNO(ERFKILL), "Partition %s has 'read-only' flag incorrectly set (must be %s, is %s), refusing.",
×
506
                                       partition_designator_to_string(designator),
507
                                       one_zero(!b), one_zero(b));
508

509
        b = FLAGS_SET(gpt_flags, SD_GPT_FLAG_GROWFS);
2,836✔
510
        if ((policy_flags & _PARTITION_POLICY_GROWFS_MASK) == (b ? PARTITION_POLICY_GROWFS_OFF : PARTITION_POLICY_GROWFS_ON))
5,548✔
511
                return log_debug_errno(SYNTHETIC_ERRNO(ERFKILL), "Partition %s has 'growfs' flag incorrectly set (must be %s, is %s), refusing.",
×
512
                                       partition_designator_to_string(designator),
513
                                       one_zero(!b), one_zero(b));
514

515
        return 0;
516
}
517

518
static int dissected_image_probe_filesystems(
226✔
519
                DissectedImage *m,
520
                int fd,
521
                const ImagePolicy *policy) {
522

523
        int r;
226✔
524

525
        assert(m);
226✔
526

527
        /* Fill in file system types if we don't know them yet. */
528

529
        for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
3,164✔
530
                DissectedPartition *p = m->partitions + i;
2,938✔
531
                PartitionPolicyFlags found_flags;
2,938✔
532

533
                if (!p->found)
2,938✔
534
                        continue;
2,391✔
535

536
                if (!p->fstype) {
547✔
537
                        /* If we have an fd referring to the partition block device, use that. Otherwise go
538
                         * via the whole block device or backing regular file, and read via offset. */
539
                        if (p->mount_node_fd >= 0)
238✔
540
                                r = probe_filesystem_full(p->mount_node_fd, p->node, 0, UINT64_MAX, /* bool restrict_fstypes= */ true, &p->fstype);
115✔
541
                        else
542
                                r = probe_filesystem_full(fd, p->node, p->offset, p->size, /* bool restrict_fstypes= */ true, &p->fstype);
123✔
543
                        if (r < 0)
238✔
544
                                return r;
545
                }
546

547
                if (streq_ptr(p->fstype, "crypto_LUKS")) {
547✔
548
                        m->encrypted = true;
5✔
549

550
                        if (p->mount_node_fd >= 0)
5✔
551
                                r = partition_is_luks2_integrity(p->mount_node_fd, /* offset= */ 0, /* size= */ UINT64_MAX);
3✔
552
                        else
553
                                r = partition_is_luks2_integrity(fd, p->offset, p->size);
2✔
554
                        if (r < 0)
5✔
555
                                return r;
556

557
                        /* found this one, it's definitely encrypted + with or without integrity checking */
558
                        found_flags = PARTITION_POLICY_UNUSED|(r > 0 ? PARTITION_POLICY_ENCRYPTEDWITHINTEGRITY : PARTITION_POLICY_ENCRYPTED);
5✔
559
                } else
560
                        /* found it, but it's definitely not encrypted, hence mask the encrypted flag, but
561
                         * set all other ways that indicate "present". */
562
                        found_flags = PARTITION_POLICY_UNUSED|PARTITION_POLICY_UNPROTECTED|PARTITION_POLICY_VERITY|PARTITION_POLICY_SIGNED;
563

564
                if (p->fstype && fstype_is_ro(p->fstype))
547✔
565
                        p->rw = false;
146✔
566

567
                if (!p->rw)
547✔
568
                        p->growfs = false;
230✔
569

570
                /* We might have learnt more about the file system now (i.e. whether it is encrypted or not),
571
                 * hence we need to validate this against policy again, to see if the policy still matches
572
                 * with this new information. Note that image_policy_check_protection() will check for
573
                 * overlap between what's allowed in the policy and what we pass as 'found_policy' here. In
574
                 * the unencrypted case we thus might pass an overly unspecific mask here (i.e. unprotected
575
                 * OR verity OR signed), but that's fine since the earlier policy check already checked more
576
                 * specific which of those three cases where OK. Keep in mind that this function here only
577
                 * looks at specific partitions (and thus can only deduce encryption or not) but not the
578
                 * overall partition table (and thus cannot deduce verity or not). The earlier dissection
579
                 * checks already did the relevant checks that look at the whole partition table, and
580
                 * enforced policy there as needed. */
581
                r = image_policy_check_protection(policy, i, found_flags);
547✔
582
                if (r < 0)
547✔
583
                        return r;
584
        }
585

586
        return 0;
587
}
588

589
static void check_partition_flags(
585✔
590
                const char *node,
591
                unsigned long long pflags,
592
                unsigned long long supported) {
593

594
        assert(node);
585✔
595

596
        /* Mask away all flags supported by this partition's type and the three flags the UEFI spec defines generically */
597
        pflags &= ~(supported |
585✔
598
                    SD_GPT_FLAG_REQUIRED_PARTITION |
599
                    SD_GPT_FLAG_NO_BLOCK_IO_PROTOCOL |
585✔
600
                    SD_GPT_FLAG_LEGACY_BIOS_BOOTABLE);
601

602
        if (pflags == 0)
585✔
603
                return;
604

605
        /* If there are other bits set, then log about it, to make things discoverable */
606
        for (unsigned i = 0; i < sizeof(pflags) * 8; i++) {
×
607
                unsigned long long bit = 1ULL << i;
×
608
                if (!FLAGS_SET(pflags, bit))
×
609
                        continue;
×
610

611
                log_debug("Unexpected partition flag %llu set on %s!", bit, node);
×
612
        }
613
}
614
#endif
615

616
int dissected_image_name_from_path(const char *path, char **ret) {
2,666✔
617
        int r;
2,666✔
618

619
        assert(path);
2,666✔
620
        assert(ret);
2,666✔
621

622
        _cleanup_free_ char *filename = NULL;
2,666✔
623
        r = path_extract_filename(path, &filename);
2,666✔
624
        if (r < 0)
2,666✔
625
                return r;
626

627
        _cleanup_free_ char *name = NULL;
2,666✔
628
        r = raw_strip_suffixes(filename, &name);
2,666✔
629
        if (r < 0)
2,666✔
630
                return r;
631

632
        if (!image_name_is_valid(name)) {
2,666✔
633
                log_debug("Image name %s is not valid, ignoring.", strna(name));
1✔
634
                *ret = NULL;
1✔
635
                return 0;
1✔
636
        }
637

638
        *ret = TAKE_PTR(name);
2,665✔
639
        return 1;
2,665✔
640
}
641

642
#if HAVE_BLKID
643
static int dissected_image_new(const char *path, DissectedImage **ret) {
2,669✔
644
        _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
×
645
        _cleanup_free_ char *name = NULL;
2,669✔
646
        int r;
2,669✔
647

648
        assert(ret);
2,669✔
649

650
        if (path) {
2,669✔
651
                r = dissected_image_name_from_path(path, &name);
2,598✔
652
                if (r < 0)
2,598✔
653
                        return r;
654
        }
655

656
        m = new(DissectedImage, 1);
2,669✔
657
        if (!m)
2,669✔
658
                return -ENOMEM;
659

660
        *m = (DissectedImage) {
2,669✔
661
                .has_init_system = -1,
662
                .image_name = TAKE_PTR(name),
2,669✔
663
        };
664

665
        for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++)
37,366✔
666
                m->partitions[i] = DISSECTED_PARTITION_NULL;
34,697✔
667

668
        *ret = TAKE_PTR(m);
2,669✔
669
        return 0;
2,669✔
670
}
671
#endif
672

673
static void dissected_partition_done(DissectedPartition *p) {
33,814✔
674
        assert(p);
33,814✔
675

676
        free(p->fstype);
33,814✔
677
        free(p->node);
33,814✔
678
        free(p->label);
33,814✔
679
        free(p->decrypted_fstype);
33,814✔
680
        free(p->decrypted_node);
33,814✔
681
        free(p->mount_options);
33,814✔
682
        safe_close(p->mount_node_fd);
33,814✔
683
        safe_close(p->fsmount_fd);
33,814✔
684

685
        *p = DISSECTED_PARTITION_NULL;
33,814✔
686
}
33,814✔
687

688
static int acquire_sig_for_roothash(
89✔
689
                int fd,
690
                uint64_t partition_offset,
691
                uint64_t partition_size,
692
                struct iovec *ret_root_hash,
693
                struct iovec *ret_root_hash_sig) {
694

695
        int r;
89✔
696

697
        assert(fd >= 0);
89✔
698

699
        if (partition_offset == UINT64_MAX || partition_size == UINT64_MAX)
89✔
700
                return -EINVAL;
89✔
701

702
        if (partition_size > 4*1024*1024) /* Signature data cannot possible be larger than 4M, refuse that */
89✔
703
                return log_debug_errno(SYNTHETIC_ERRNO(EFBIG), "Verity signature partition is larger than 4M, refusing.");
×
704

705
        _cleanup_free_ char *buf = new(char, partition_size+1);
178✔
706
        if (!buf)
89✔
707
                return -ENOMEM;
708

709
        ssize_t n = pread(fd, buf, partition_size, partition_offset);
89✔
710
        if (n < 0)
89✔
711
                return -ENOMEM;
712
        if ((uint64_t) n != partition_size)
89✔
713
                return -EIO;
714

715
        const char *e = memchr(buf, 0, partition_size);
89✔
716
        if (e) {
89✔
717
                /* If we found a NUL byte then the rest of the data must be NUL too */
718
                if (!memeqzero(e, partition_size - (e - buf)))
89✔
719
                        return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Signature data contains embedded NUL byte.");
×
720
        } else
721
                buf[partition_size] = 0;
×
722

723
        _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL;
89✔
724
        r = sd_json_parse(buf, 0, &v, /* reterr_line= */ NULL, /* reterr_column= */ NULL);
89✔
725
        if (r < 0)
89✔
726
                return log_debug_errno(r, "Failed to parse signature JSON data: %m");
×
727

728
        sd_json_variant *rh = sd_json_variant_by_key(v, "rootHash");
89✔
729
        if (!rh)
89✔
730
                return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Signature JSON object lacks 'rootHash' field.");
×
731

732
        _cleanup_(iovec_done) struct iovec root_hash = {};
89✔
733
        r = sd_json_variant_unhex(rh, &root_hash.iov_base, &root_hash.iov_len);
89✔
734
        if (r < 0)
89✔
735
                return log_debug_errno(r, "Failed to parse root hash field: %m");
×
736

737
        sd_json_variant *sig = sd_json_variant_by_key(v, "signature");
89✔
738
        if (!sig)
89✔
739
                return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Signature JSON object lacks 'signature' field.");
×
740

741
        _cleanup_(iovec_done) struct iovec root_hash_sig = {};
89✔
742
        r = sd_json_variant_unbase64(sig, &root_hash_sig.iov_base, &root_hash_sig.iov_len);
89✔
743
        if (r < 0)
89✔
744
                return log_debug_errno(r, "Failed to parse signature field: %m");
×
745

746
        if (ret_root_hash)
89✔
747
                *ret_root_hash = TAKE_STRUCT(root_hash);
89✔
748

749
        if (ret_root_hash_sig)
89✔
750
                *ret_root_hash_sig = TAKE_STRUCT(root_hash_sig);
65✔
751

752
        return 0;
753
}
754

755
#if HAVE_BLKID
756
static int diskseq_should_be_used(
3,581✔
757
                const char *whole_devname,
758
                uint64_t diskseq,
759
                DissectImageFlags flags) {
760

761
        int r;
3,581✔
762

763
        assert(whole_devname);
3,581✔
764

765
        /* No diskseq. We cannot use by-diskseq symlink. */
766
        if (diskseq == 0)
3,581✔
767
                return false;
3,581✔
768

769
        /* Do not use by-diskseq link unless DISSECT_IMAGE_DISKSEQ_DEVNODE flag is explicitly set. */
770
        if (!FLAGS_SET(flags, DISSECT_IMAGE_DISKSEQ_DEVNODE))
3,473✔
771
                return false;
772

773
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
3,581✔
774
        r = sd_device_new_from_devname(&dev, whole_devname);
×
775
        if (r < 0)
×
776
                return r;
777

778
        /* When ID_IGNORE_DISKSEQ udev property is set, the by-diskseq symlink will not be created. */
779
        r = device_get_property_bool(dev, "ID_IGNORE_DISKSEQ");
×
780
        if (r >= 0)
×
781
                return !r; /* If explicitly specified, use it. */
×
782
        if (r != -ENOENT)
×
783
                return r;
×
784

785
        return true;
786
}
787

788
static int make_partition_devname(
3,581✔
789
                const char *whole_devname,
790
                uint64_t diskseq,
791
                int nr,
792
                DissectImageFlags flags,
793
                char **ret) {
794

795
        _cleanup_free_ char *s = NULL;
3,581✔
796
        int r;
3,581✔
797

798
        assert(whole_devname);
3,581✔
799
        assert(nr != 0); /* zero is not a valid partition nr */
3,581✔
800
        assert(ret);
3,581✔
801

802
        r = diskseq_should_be_used(whole_devname, diskseq, flags);
3,581✔
803
        if (r < 0)
3,581✔
804
                log_debug_errno(r, "Failed to determine if diskseq should be used for %s, assuming no, ignoring: %m", whole_devname);
×
805
        if (r <= 0) {
3,581✔
806
                /* Given a whole block device node name (e.g. /dev/sda or /dev/loop7) generate a partition
807
                 * device name (e.g. /dev/sda7 or /dev/loop7p5). The rule the kernel uses is simple: if whole
808
                 * block device node name ends in a digit, then suffix a 'p', followed by the partition
809
                 * number. Otherwise, just suffix the partition number without any 'p'. */
810

811
                if (nr < 0) { /* whole disk? */
3,581✔
812
                        s = strdup(whole_devname);
2,289✔
813
                        if (!s)
2,289✔
814
                                return -ENOMEM;
815
                } else {
816
                        r = partition_node_of(whole_devname, nr, &s);
1,292✔
817
                        if (r < 0)
1,292✔
818
                                return r;
819
                }
820
        } else {
821
                if (nr < 0) /* whole disk? */
×
822
                        r = asprintf(&s, "/dev/disk/by-diskseq/%" PRIu64, diskseq);
×
823
                else
824
                        r = asprintf(&s, "/dev/disk/by-diskseq/%" PRIu64 "-part%i", diskseq, nr);
×
825
                if (r < 0)
×
826
                        return -ENOMEM;
827
        }
828

829
        *ret = TAKE_PTR(s);
3,581✔
830
        return 0;
3,581✔
831
}
832

833
static int open_partition(
2,608✔
834
                const char *node,
835
                bool is_partition,
836
                const LoopDevice *loop) {
837

838
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
2,608✔
839
        _cleanup_close_ int fd = -EBADF;
2,608✔
840
        dev_t devnum;
2,608✔
841
        int r;
2,608✔
842

843
        assert(node);
2,608✔
844
        assert(loop || !is_partition);
2,608✔
845

846
        fd = open(node, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
2,608✔
847
        if (fd < 0)
2,608✔
848
                return -errno;
×
849

850
        if (!loop)
2,608✔
851
                return TAKE_FD(fd);
2,608✔
852

853
        /* Check if the block device is a child of (or equivalent to) the originally provided one. */
854
        r = block_device_new_from_fd(fd, is_partition ? BLOCK_DEVICE_LOOKUP_WHOLE_DISK : 0, &dev);
2,608✔
855
        if (r < 0)
2,608✔
856
                return r;
857

858
        r = sd_device_get_devnum(dev, &devnum);
2,608✔
859
        if (r < 0)
2,608✔
860
                return r;
861

862
        if (loop->devno != devnum)
2,608✔
863
                return -ENXIO;
864

865
        /* Also check diskseq. */
866
        if (loop->diskseq != 0) {
2,608✔
867
                uint64_t diskseq;
2,608✔
868

869
                r = fd_get_diskseq(fd, &diskseq);
2,608✔
870
                if (r < 0)
2,608✔
871
                        return r;
×
872

873
                if (loop->diskseq != diskseq)
2,608✔
874
                        return -ENXIO;
875
        }
876

877
        log_debug("Opened %s (fd=%i, whole_block_devnum=" DEVNUM_FORMAT_STR ", diskseq=%" PRIu64 ").",
2,608✔
878
                  node, fd, DEVNUM_FORMAT_VAL(loop->devno), loop->diskseq);
879
        return TAKE_FD(fd);
880
}
881

882
static int compare_arch(Architecture a, Architecture b) {
14✔
883
        if (a == b)
14✔
884
                return 0;
885

886
        if (a == native_architecture())
×
887
                return 1;
888

889
        if (b == native_architecture())
×
890
                return -1;
891

892
#ifdef ARCHITECTURE_SECONDARY
893
        if (a == ARCHITECTURE_SECONDARY)
×
894
                return 1;
895

896
        if (b == ARCHITECTURE_SECONDARY)
×
897
                return -1;
×
898
#endif
899

900
        return 0;
901
}
902

903
static bool image_filter_test(const ImageFilter *filter, PartitionDesignator d, const char *label) {
3,146✔
904
        assert(d < _PARTITION_DESIGNATOR_MAX);
3,146✔
905

906
        if (d < 0) /* For unspecified designators we have no filter expression */
3,146✔
907
                return true;
908

909
        if (!filter || !filter->pattern[d])
3,010✔
910
                return true;
911

912
        return fnmatch(filter->pattern[d], strempty(label),  FNM_NOESCAPE) == 0;
×
913
}
914

915
static int dissect_image_from_unpartitioned(
2,290✔
916
                const char *devname,
917
                uint64_t diskseq,
918
                const sd_id128_t *uuid,
919
                bool encrypted,
920
                const VeritySettings *verity,
921
                const MountOptions *mount_options,
922
                const ImagePolicy *policy,
923
                const ImageFilter *filter,
924
                int *mount_node_fd, /* taken over on success */
925
                char **fstype, /* taken over on success */
926
                DissectedImage *m,
927
                DissectImageFlags flags,
928
                PartitionPolicyFlags found_flags) {
929

930
        _cleanup_free_ char *n = NULL, *o = NULL;
2,290✔
931
        const char *options = NULL;
2,290✔
932
        int r;
2,290✔
933

934
        assert(devname);
2,290✔
935
        assert(m);
2,290✔
936
        assert(fstype);
2,290✔
937

938
        if (!image_filter_test(filter, PARTITION_ROOT, /* label= */ NULL)) /* do a filter check with an empty partition label */
2,290✔
939
                return -ECOMM;
940

941
        r = image_policy_may_use(policy, PARTITION_ROOT);
2,290✔
942
        if (r < 0)
2,290✔
943
                return r;
944
        if (r == 0) /* policy says ignore this, so we ignore it */
2,290✔
945
                return -ENOPKG;
946

947
        r = image_policy_check_protection(policy, PARTITION_ROOT, found_flags);
2,290✔
948
        if (r < 0)
2,290✔
949
                return r;
950

951
        r = image_policy_check_partition_flags(policy, PARTITION_ROOT, 0); /* we have no gpt partition flags, hence check against all bits off */
2,289✔
952
        if (r < 0)
2,289✔
953
                return r;
954

955
        r = make_partition_devname(devname, diskseq, /* nr= */ -1, flags, &n);
2,289✔
956
        if (r < 0)
2,289✔
957
                return r;
958

959
        m->single_file_system = true;
2,289✔
960
        m->encrypted = encrypted;
2,289✔
961

962
        m->has_verity = verity && verity->data_path;
2,289✔
963
        m->verity_ready = verity_settings_data_covers(verity, PARTITION_ROOT);
2,289✔
964

965
        m->has_verity_sig = false; /* signature not embedded, must be specified */
2,289✔
966
        m->verity_sig_ready = m->verity_ready && iovec_is_set(&verity->root_hash);
2,289✔
967

968
        if (uuid)
2,289✔
969
                m->image_uuid = *uuid;
2,289✔
970

971
        options = mount_options_from_designator(mount_options, PARTITION_ROOT);
2,289✔
972
        if (options) {
2,289✔
973
                o = strdup(options);
3✔
974
                if (!o)
3✔
975
                        return -ENOMEM;
976
        }
977

978
        m->partitions[PARTITION_ROOT] = (DissectedPartition) {
4,578✔
979
                .found = true,
980
                .rw = !m->verity_ready && !fstype_is_ro(*fstype),
2,289✔
981
                .partno = -1,
982
                .architecture = _ARCHITECTURE_INVALID,
983
                .fstype = TAKE_PTR(*fstype),
2,289✔
984
                .node = TAKE_PTR(n),
2,289✔
985
                .mount_options = TAKE_PTR(o),
2,289✔
986
                .mount_node_fd = mount_node_fd ? TAKE_FD(*mount_node_fd) : -EBADF,
2,289✔
987
                .size = UINT64_MAX,
988
                .fsmount_fd = -EBADF,
989
        };
990

991
        return 0;
2,289✔
992
}
993

994
static int dissect_image(
2,598✔
995
                DissectedImage *m,
996
                int fd,
997
                const char *devname,
998
                const VeritySettings *verity,
999
                const MountOptions *mount_options,
1000
                const ImagePolicy *policy,
1001
                const ImageFilter *filter,
1002
                DissectImageFlags flags) {
1003

1004
        sd_id128_t root_uuid = SD_ID128_NULL, root_verity_uuid = SD_ID128_NULL;
2,598✔
1005
        sd_id128_t usr_uuid = SD_ID128_NULL, usr_verity_uuid = SD_ID128_NULL;
2,598✔
1006
        bool is_gpt, is_mbr, multiple_generic = false,
2,598✔
1007
                generic_rw = false,  /* initialize to appease gcc */
2,598✔
1008
                generic_growfs = false;
2,598✔
1009
        _cleanup_(blkid_free_probep) blkid_probe b = NULL;
×
1010
        _cleanup_free_ char *generic_node = NULL;
2,598✔
1011
        sd_id128_t generic_uuid = SD_ID128_NULL;
2,598✔
1012
        const char *pttype = NULL;
2,598✔
1013
        blkid_partlist pl;
2,598✔
1014
        int r, generic_nr = -1, n_partitions;
2,598✔
1015

1016
        assert(m);
2,598✔
1017
        assert(fd >= 0);
2,598✔
1018
        assert(devname);
2,598✔
1019
        assert(!verity || verity->designator < 0 || IN_SET(verity->designator, PARTITION_ROOT, PARTITION_USR));
2,598✔
1020
        assert(!verity || iovec_is_valid(&verity->root_hash));
2,598✔
1021
        assert(!verity || iovec_is_valid(&verity->root_hash_sig));
2,597✔
1022
        assert(!verity || iovec_is_set(&verity->root_hash) || !iovec_is_set(&verity->root_hash_sig));
2,597✔
1023
        assert(!((flags & DISSECT_IMAGE_GPT_ONLY) && (flags & DISSECT_IMAGE_NO_PARTITION_TABLE)));
2,598✔
1024
        assert(m->sector_size > 0);
2,598✔
1025

1026
        /* Probes a disk image, and returns information about what it found in *ret.
1027
         *
1028
         * Returns -ENOPKG if no suitable partition table or file system could be found.
1029
         * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found.
1030
         * Returns -ENXIO if we couldn't find any partition suitable as root or /usr partition
1031
         * Returns -ENOTUNIQ if we only found multiple generic partitions and thus don't know what to do with that
1032
         * Returns -ERFKILL if image doesn't match image policy
1033
         * Returns -EBADR if verity data was provided externally for an image that has a GPT partition table (i.e. is not just a naked fs)
1034
         * Returns -EPROTONOSUPPORT if DISSECT_IMAGE_ADD_PARTITION_DEVICES is set but the block device does not have partition logic enabled
1035
         * Returns -ENOMSG if we didn't find a single usable partition (and DISSECT_IMAGE_REFUSE_EMPTY is set)
1036
         * Returns -EUCLEAN if some file system had an ambiguous file system superblock signature
1037
         */
1038

1039
        uint64_t diskseq = m->loop ? m->loop->diskseq : 0;
2,598✔
1040

1041
        if (verity && iovec_is_set(&verity->root_hash)) {
2,598✔
1042
                sd_id128_t fsuuid, vuuid;
1,685✔
1043

1044
                /* If a root hash is supplied, then we use the root partition that has a UUID that match the
1045
                 * first 128-bit of the root hash. And we use the verity partition that has a UUID that match
1046
                 * the final 128-bit. */
1047

1048
                if (verity->root_hash.iov_len < sizeof(sd_id128_t))
1,685✔
1049
                        return -EINVAL;
×
1050

1051
                memcpy(&fsuuid, verity->root_hash.iov_base, sizeof(sd_id128_t));
1,685✔
1052
                memcpy(&vuuid, (const uint8_t*) verity->root_hash.iov_base + verity->root_hash.iov_len - sizeof(sd_id128_t), sizeof(sd_id128_t));
1,685✔
1053

1054
                if (sd_id128_is_null(fsuuid))
1,685✔
1055
                        return -EINVAL;
×
1056
                if (sd_id128_is_null(vuuid))
1,685✔
1057
                        return -EINVAL;
×
1058

1059
                /* If the verity data declares it's for the /usr partition, then search for that, in all
1060
                 * other cases assume it's for the root partition. */
1061
                if (verity->designator == PARTITION_USR) {
1,685✔
1062
                        usr_uuid = fsuuid;
3✔
1063
                        usr_verity_uuid = vuuid;
3✔
1064
                } else {
1065
                        root_uuid = fsuuid;
1,682✔
1066
                        root_verity_uuid = vuuid;
1,682✔
1067
                }
1068
        }
1069

1070
        r = dlopen_libblkid();
2,598✔
1071
        if (r < 0)
2,598✔
1072
                return r;
1073

1074
        b = sym_blkid_new_probe();
2,598✔
1075
        if (!b)
2,598✔
1076
                return -ENOMEM;
1077

1078
        r = probe_blkid_filter(b);
2,598✔
1079
        if (r < 0)
2,598✔
1080
                return r;
1081

1082
        errno = 0;
2,598✔
1083
        r = sym_blkid_probe_set_device(b, fd, 0, 0);
2,598✔
1084
        if (r != 0)
2,598✔
1085
                return errno_or_else(ENOMEM);
×
1086

1087
        errno = 0;
2,598✔
1088
        r = sym_blkid_probe_set_sectorsize(b, m->sector_size);
2,598✔
1089
        if (r != 0)
2,598✔
1090
                return errno_or_else(EIO);
×
1091

1092
        if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
2,598✔
1093
                /* Look for file system superblocks, unless we only shall look for GPT partition tables */
1094
                sym_blkid_probe_enable_superblocks(b, 1);
2,491✔
1095
                sym_blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE|BLKID_SUBLKS_UUID);
2,491✔
1096
        }
1097

1098
        sym_blkid_probe_enable_partitions(b, 1);
2,598✔
1099
        sym_blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
2,598✔
1100

1101
        errno = 0;
2,598✔
1102
        r = sym_blkid_do_safeprobe(b);
2,598✔
1103
        if (r == _BLKID_SAFEPROBE_ERROR)
2,598✔
1104
                return errno_or_else(EIO);
×
1105
        if (IN_SET(r, _BLKID_SAFEPROBE_AMBIGUOUS, _BLKID_SAFEPROBE_NOT_FOUND))
2,598✔
1106
                return log_debug_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to identify any partition table.");
×
1107

1108
        assert(r == _BLKID_SAFEPROBE_FOUND);
2,598✔
1109

1110
        if ((!(flags & DISSECT_IMAGE_GPT_ONLY) &&
2,598✔
1111
            (flags & DISSECT_IMAGE_GENERIC_ROOT)) ||
2,491✔
1112
            (flags & DISSECT_IMAGE_NO_PARTITION_TABLE)) {
263✔
1113
                _cleanup_free_ char *root_fstype_string = NULL;
2,418✔
1114
                const char *usage = NULL;
2,418✔
1115
                bool encrypted;
2,418✔
1116

1117
                r = partition_policy_determine_fstype(policy, PARTITION_ROOT, &encrypted, &root_fstype_string);
2,418✔
1118
                if (r < 0)
2,418✔
1119
                        return r;
1120

1121
                /* If flags permit this, also allow using non-partitioned single-filesystem images */
1122

1123
                (void) sym_blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
2,418✔
1124
                if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
2,418✔
1125
                        _cleanup_free_ char *t = NULL;
2,290✔
1126
                        const char *fstype = NULL;
2,290✔
1127
                        _cleanup_close_ int mount_node_fd = -EBADF;
2,290✔
1128
                        sd_id128_t uuid = SD_ID128_NULL;
2,290✔
1129
                        PartitionPolicyFlags found_flags;
2,290✔
1130

1131
                        /* OK, we have found a file system, that's our root partition then. */
1132

1133
                        if (!root_fstype_string) {
2,290✔
1134
                                (void) sym_blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
2,290✔
1135

1136
                                /* blkid will return FAT's serial number as UUID, hence it is quite possible that
1137
                                * parsing this will fail. We'll ignore the ID, since it's just too short to be
1138
                                * useful as true identifier. */
1139
                                (void) blkid_probe_lookup_value_id128(b, "UUID", &uuid);
2,290✔
1140
                        } else
1141
                                /* The policy fstype flags translate to the literal fstype name of each filesystem. */
1142
                                fstype = root_fstype_string;
×
1143

1144
                        encrypted = encrypted || streq_ptr(fstype, "crypto_LUKS");
2,290✔
1145

1146
                        if (verity_settings_data_covers(verity, PARTITION_ROOT))
2,290✔
1147
                                found_flags = iovec_is_set(&verity->root_hash_sig) ? PARTITION_POLICY_SIGNED : PARTITION_POLICY_VERITY;
3,318✔
1148
                        else if (encrypted) {
631✔
1149
                                r = partition_is_luks2_integrity(fd, /* offset= */ 0, /* size= */ UINT64_MAX);
×
1150
                                if (r < 0)
×
1151
                                        return r;
1152

1153
                                found_flags = r > 0 ? PARTITION_POLICY_ENCRYPTEDWITHINTEGRITY : PARTITION_POLICY_ENCRYPTED;
×
1154
                        } else
1155
                                found_flags = PARTITION_POLICY_UNPROTECTED;
1156

1157
                        if (FLAGS_SET(flags, DISSECT_IMAGE_PIN_PARTITION_DEVICES)) {
2,290✔
1158
                                mount_node_fd = open_partition(devname, /* is_partition= */ false, m->loop);
2,290✔
1159
                                if (mount_node_fd < 0)
2,290✔
1160
                                        return mount_node_fd;
1161
                        }
1162

1163
                        if (fstype) {
2,290✔
1164
                                t = strdup(fstype);
2,290✔
1165
                                if (!t)
2,290✔
1166
                                        return -ENOMEM;
1167
                        }
1168

1169
                        return dissect_image_from_unpartitioned(
2,290✔
1170
                                        devname,
1171
                                        diskseq,
1172
                                        &uuid,
1173
                                        encrypted,
1174
                                        verity,
1175
                                        mount_options,
1176
                                        policy,
1177
                                        filter,
1178
                                        &mount_node_fd,
1179
                                        &t,
1180
                                        m,
1181
                                        flags,
1182
                                        found_flags);
1183
                }
1184
        }
1185

1186
        (void) sym_blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
308✔
1187
        if (!pttype)
308✔
1188
                return -ENOPKG;
1189

1190
        is_gpt = streq_ptr(pttype, "gpt");
240✔
1191
        is_mbr = streq_ptr(pttype, "dos");
240✔
1192

1193
        if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
240✔
1194
                return -ENOPKG;
1195

1196
        /* We support external verity data partitions only if the image has no partition table */
1197
        if (verity && verity->data_path)
240✔
1198
                return -EBADR;
1199

1200
        if (FLAGS_SET(flags, DISSECT_IMAGE_ADD_PARTITION_DEVICES)) {
240✔
1201
                /* Safety check: refuse block devices that carry a partition table but for which the kernel doesn't
1202
                 * do partition scanning. */
1203
                r = blockdev_partscan_enabled_fd(fd);
111✔
1204
                if (r < 0)
111✔
1205
                        return r;
1206
                if (r == 0)
111✔
1207
                        return -EPROTONOSUPPORT;
1208
        }
1209

1210
        (void) blkid_probe_lookup_value_id128(b, "PTUUID", &m->image_uuid);
240✔
1211

1212
        errno = 0;
240✔
1213
        pl = sym_blkid_probe_get_partitions(b);
240✔
1214
        if (!pl)
240✔
1215
                return errno_or_else(ENOMEM);
×
1216

1217
        errno = 0;
240✔
1218
        n_partitions = sym_blkid_partlist_numof_partitions(pl);
240✔
1219
        if (n_partitions < 0)
240✔
1220
                return errno_or_else(EIO);
×
1221

1222
        for (int i = 0; i < n_partitions; i++) {
956✔
1223
                _cleanup_free_ char *node = NULL;
720✔
1224
                unsigned long long pflags;
720✔
1225
                blkid_loff_t start, size;
720✔
1226
                blkid_partition pp;
720✔
1227
                int nr;
720✔
1228

1229
                errno = 0;
720✔
1230
                pp = sym_blkid_partlist_get_partition(pl, i);
720✔
1231
                if (!pp)
720✔
1232
                        return errno_or_else(EIO);
×
1233

1234
                pflags = sym_blkid_partition_get_flags(pp);
720✔
1235

1236
                errno = 0;
720✔
1237
                nr = sym_blkid_partition_get_partno(pp);
720✔
1238
                if (nr < 0)
720✔
1239
                        return errno_or_else(EIO);
×
1240

1241
                errno = 0;
720✔
1242
                start = sym_blkid_partition_get_start(pp);
720✔
1243
                if (start < 0)
720✔
1244
                        return errno_or_else(EIO);
×
1245

1246
                assert((uint64_t) start < UINT64_MAX/512);
720✔
1247

1248
                errno = 0;
720✔
1249
                size = sym_blkid_partition_get_size(pp);
720✔
1250
                if (size < 0)
720✔
1251
                        return errno_or_else(EIO);
×
1252

1253
                assert((uint64_t) size < UINT64_MAX/512);
720✔
1254

1255
                /* While probing we need the non-diskseq device node name to access the thing, hence mask off
1256
                 * DISSECT_IMAGE_DISKSEQ_DEVNODE. */
1257
                r = make_partition_devname(devname, diskseq, nr, flags & ~DISSECT_IMAGE_DISKSEQ_DEVNODE, &node);
720✔
1258
                if (r < 0)
720✔
1259
                        return r;
1260

1261
                /* So here's the thing: after the main ("whole") block device popped up it might take a while
1262
                 * before the kernel fully probed the partition table. Waiting for that to finish is icky in
1263
                 * userspace. So here's what we do instead. We issue the BLKPG_ADD_PARTITION ioctl to add the
1264
                 * partition ourselves, racing against the kernel. Good thing is: if this call fails with
1265
                 * EBUSY then the kernel was quicker than us, and that's totally OK, the outcome is good for
1266
                 * us: the device node will exist. If OTOH our call was successful we won the race. Which is
1267
                 * also good as the outcome is the same: the partition block device exists, and we can use
1268
                 * it.
1269
                 *
1270
                 * Kernel returns EBUSY if there's already a partition by that number or an overlapping
1271
                 * partition already existent. */
1272

1273
                if (FLAGS_SET(flags, DISSECT_IMAGE_ADD_PARTITION_DEVICES)) {
720✔
1274
                        r = block_device_add_partition(fd, node, nr, (uint64_t) start * 512, (uint64_t) size * 512);
338✔
1275
                        if (r < 0) {
338✔
1276
                                if (r != -EBUSY)
338✔
1277
                                        return log_debug_errno(r, "BLKPG_ADD_PARTITION failed: %m");
×
1278

1279
                                log_debug_errno(r, "Kernel was quicker than us in adding partition %i.", nr);
338✔
1280
                        } else
1281
                                log_debug("We were quicker than kernel in adding partition %i.", nr);
×
1282
                }
1283

1284
                if (is_gpt) {
720✔
1285
                        const char *label;
720✔
1286
                        sd_id128_t type_id, id;
720✔
1287
                        GptPartitionType type;
720✔
1288
                        bool rw = true, growfs = false;
720✔
1289

1290
                        r = blkid_partition_get_uuid_id128(pp, &id);
720✔
1291
                        if (r < 0) {
720✔
1292
                                log_debug_errno(r, "Failed to read partition UUID, ignoring: %m");
×
1293
                                continue;
34✔
1294
                        }
1295

1296
                        r = blkid_partition_get_type_id128(pp, &type_id);
720✔
1297
                        if (r < 0) {
720✔
1298
                                log_debug_errno(r, "Failed to read partition type UUID, ignoring: %m");
×
1299
                                continue;
×
1300
                        }
1301

1302
                        type = gpt_partition_type_from_uuid(type_id);
720✔
1303

1304
                        label = sym_blkid_partition_get_name(pp); /* libblkid returns NULL here if empty */
720✔
1305

1306
                        /* systemd-sysupdate expects empty partitions to be marked with an "_empty" label, hence ignore them here. */
1307
                        if (streq_ptr(label, "_empty"))
720✔
1308
                                continue;
×
1309

1310
                        if (!image_filter_test(filter, type.designator, label))
720✔
1311
                                continue;
×
1312

1313
                        log_debug("Dissecting %s partition with label %s and UUID %s.",
824✔
1314
                                  strna(partition_designator_to_string(type.designator)), strna(label), SD_ID128_TO_UUID_STRING(id));
1315

1316
                        /* Note that we don't check the SD_GPT_FLAG_NO_AUTO flag for the ESP, as it is
1317
                         * not defined there. We instead check the SD_GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as
1318
                         * recommended by the UEFI spec (See "12.3.3 Number and Location of System
1319
                         * Partitions"). */
1320
                        if (FLAGS_SET(pflags, SD_GPT_FLAG_NO_AUTO) && type.designator != PARTITION_ESP) {
720✔
1321
                                log_debug("Partition has 'no auto' flag set, ignoring.");
×
1322
                                continue;
×
1323
                        }
1324

1325
                        if (!verity && partition_designator_is_verity(type.designator)) {
720✔
1326
                                log_debug("Partition is a verity hash or verity signature partition but no verity was requested, ignoring.");
×
1327
                                continue;
×
1328
                        }
1329

1330
                        PartitionDesignator vd = partition_verity_to_data(type.designator);
720✔
1331
                        if (verity && verity->designator >= 0 && vd >= 0 && vd != verity->designator) {
720✔
1332
                                log_debug("Partition is a %s partition but verity was only requested for the %s partition, ignoring.",
×
1333
                                          partition_designator_to_string(type.designator),
1334
                                          partition_designator_to_string(verity->designator));
1335
                                continue;
×
1336
                        }
1337

1338
                        const char *fstype = getenv_fstype(type.designator);
720✔
1339

1340
                        if (IN_SET(type.designator,
720✔
1341
                                   PARTITION_HOME,
1342
                                   PARTITION_SRV,
1343
                                   PARTITION_XBOOTLDR,
1344
                                   PARTITION_TMP)) {
1345

1346
                                check_partition_flags(node, pflags,
33✔
1347
                                                      SD_GPT_FLAG_NO_AUTO | SD_GPT_FLAG_READ_ONLY | SD_GPT_FLAG_GROWFS);
1348

1349
                                rw = !(pflags & SD_GPT_FLAG_READ_ONLY);
33✔
1350
                                growfs = FLAGS_SET(pflags, SD_GPT_FLAG_GROWFS);
33✔
1351

1352
                                /* XBOOTLDR cannot be integrity protected (since firmware needs to access
1353
                                 * it), hence be restrictive with the fs choice when dissecting. */
1354
                                if (type.designator == PARTITION_XBOOTLDR && !fstype)
33✔
1355
                                        fstype = "vfat";
1✔
1356

1357
                        } else if (type.designator == PARTITION_ESP) {
687✔
1358

1359
                                if (FLAGS_SET(pflags, SD_GPT_FLAG_NO_BLOCK_IO_PROTOCOL)) {
135✔
1360
                                        log_debug("ESP Partition has 'no block io' flag set, ignoring.");
×
1361
                                        continue;
×
1362
                                }
1363

1364
                                /* Effectively the ESP has to be VFAT, let's enforce this */
1365
                                if (!fstype)
135✔
1366
                                        fstype = "vfat";
135✔
1367

1368
                        } else if (IN_SET(type.designator, PARTITION_ROOT, PARTITION_USR)) {
552✔
1369
                                sd_id128_t expected_uuid = type.designator == PARTITION_ROOT ? root_uuid : usr_uuid;
231✔
1370

1371
                                check_partition_flags(node, pflags,
231✔
1372
                                                      SD_GPT_FLAG_NO_AUTO | SD_GPT_FLAG_READ_ONLY | SD_GPT_FLAG_GROWFS);
1373

1374
                                if (!sd_id128_is_null(expected_uuid) && !sd_id128_equal(expected_uuid, id)) {
462✔
1375
                                        log_debug("Partition UUID '%s' does not match expected UUID '%s' derived from %s verity hash, ignoring.",
×
1376
                                                  SD_ID128_TO_UUID_STRING(id),
1377
                                                  SD_ID128_TO_UUID_STRING(expected_uuid),
1378
                                                  partition_designator_to_string(type.designator));
1379
                                        continue;
×
1380
                                }
1381

1382
                                rw = !(pflags & SD_GPT_FLAG_READ_ONLY);
231✔
1383
                                growfs = FLAGS_SET(pflags, SD_GPT_FLAG_GROWFS);
231✔
1384

1385
                        } else if (IN_SET(type.designator, PARTITION_ROOT_VERITY, PARTITION_USR_VERITY)) {
321✔
1386
                                sd_id128_t expected_uuid = type.designator == PARTITION_ROOT_VERITY ? root_verity_uuid : usr_verity_uuid;
91✔
1387

1388
                                check_partition_flags(node, pflags,
91✔
1389
                                                      SD_GPT_FLAG_NO_AUTO | SD_GPT_FLAG_READ_ONLY);
1390

1391
                                m->has_verity = true;
91✔
1392

1393
                                if (!sd_id128_is_null(expected_uuid) && !sd_id128_equal(expected_uuid, id)) {
182✔
1394
                                        log_debug("Partition UUID '%s' does not match expected UUID '%s' derived from %s verity hash, ignoring.",
×
1395
                                                  SD_ID128_TO_UUID_STRING(id),
1396
                                                  SD_ID128_TO_UUID_STRING(expected_uuid),
1397
                                                  partition_designator_to_string(partition_verity_to_data(type.designator)));
1398
                                        continue;
×
1399
                                }
1400

1401
                                fstype = "DM_verity_hash";
91✔
1402
                                rw = false;
91✔
1403

1404
                        } else if (IN_SET(type.designator, PARTITION_ROOT_VERITY_SIG, PARTITION_USR_VERITY_SIG)) {
230✔
1405
                                if (verity && iovec_is_set(&verity->root_hash)) {
90✔
1406
                                        _cleanup_(iovec_done) struct iovec root_hash = {};
24✔
1407

1408
                                        r = acquire_sig_for_roothash(
48✔
1409
                                                        fd,
1410
                                                        start * 512,
24✔
1411
                                                        size * 512,
24✔
1412
                                                        &root_hash,
1413
                                                        /* ret_root_hash_sig= */ NULL);
1414
                                        if (r < 0)
24✔
1415
                                                return r;
×
1416
                                        if (iovec_memcmp(&verity->root_hash, &root_hash) != 0) {
24✔
1417
                                                if (DEBUG_LOGGING) {
×
1418
                                                        _cleanup_free_ char *found = NULL, *expected = NULL;
×
1419

1420
                                                        found = hexmem(root_hash.iov_base, root_hash.iov_len);
×
1421
                                                        expected = hexmem(verity->root_hash.iov_base, verity->root_hash.iov_len);
×
1422

1423
                                                        log_debug("Verity root hash in signature JSON data (%s) doesn't match configured hash (%s).", strna(found), strna(expected));
×
1424
                                                }
1425
                                                continue;
×
1426
                                        }
1427
                                }
1428

1429
                                check_partition_flags(node, pflags,
90✔
1430
                                                      SD_GPT_FLAG_NO_AUTO | SD_GPT_FLAG_READ_ONLY);
1431

1432
                                m->has_verity_sig = true;
90✔
1433
                                fstype = "verity_hash_signature";
90✔
1434
                                rw = false;
90✔
1435

1436
                        } else if (type.designator == PARTITION_SWAP) {
140✔
1437

1438
                                check_partition_flags(node, pflags, SD_GPT_FLAG_NO_AUTO);
3✔
1439

1440
                                /* Note: we don't set fstype = "swap" here, because we still need to probe if
1441
                                 * it might be encrypted (i.e. fstype "crypt_LUKS") or unencrypted
1442
                                 * (i.e. fstype "swap"), and the only way to figure that out is via fstype
1443
                                 * probing. */
1444

1445
                        /* We don't have a designator for SD_GPT_LINUX_GENERIC so check the UUID instead. */
1446
                        } else if (sd_id128_equal(type.uuid, SD_GPT_LINUX_GENERIC)) {
137✔
1447

1448
                                if (!image_filter_test(filter, PARTITION_ROOT, label))
136✔
1449
                                        continue;
×
1450

1451
                                check_partition_flags(node, pflags,
136✔
1452
                                                      SD_GPT_FLAG_NO_AUTO | SD_GPT_FLAG_READ_ONLY | SD_GPT_FLAG_GROWFS);
1453

1454
                                if (generic_node)
136✔
1455
                                        multiple_generic = true;
1456
                                else {
1457
                                        generic_nr = nr;
134✔
1458
                                        generic_rw = !(pflags & SD_GPT_FLAG_READ_ONLY);
134✔
1459
                                        generic_growfs = FLAGS_SET(pflags, SD_GPT_FLAG_GROWFS);
134✔
1460
                                        generic_uuid = id;
134✔
1461
                                        generic_node = TAKE_PTR(node);
134✔
1462
                                }
1463

1464
                        } else if (type.designator == PARTITION_VAR) {
1✔
1465

1466
                                check_partition_flags(node, pflags,
1✔
1467
                                                      SD_GPT_FLAG_NO_AUTO | SD_GPT_FLAG_READ_ONLY | SD_GPT_FLAG_GROWFS);
1468

1469
                                if (!FLAGS_SET(flags, DISSECT_IMAGE_RELAX_VAR_CHECK)) {
1✔
1470
                                        sd_id128_t var_uuid;
1✔
1471

1472
                                        /* For /var we insist that the uuid of the partition matches the
1473
                                         * HMAC-SHA256 of the /var GPT partition type uuid, keyed by machine
1474
                                         * ID. Why? Unlike the other partitions /var is inherently
1475
                                         * installation specific, hence we need to be careful not to mount it
1476
                                         * in the wrong installation. By hashing the partition UUID from
1477
                                         * /etc/machine-id we can securely bind the partition to the
1478
                                         * installation. */
1479

1480
                                        r = sd_id128_get_machine_app_specific(SD_GPT_VAR, &var_uuid);
1✔
1481
                                        if (r < 0)
1✔
1482
                                                return r;
×
1483

1484
                                        if (!sd_id128_equal(var_uuid, id)) {
1✔
1485
                                                log_debug("Found a /var/ partition, but its UUID didn't match our expectations "
1✔
1486
                                                          "(found: " SD_ID128_UUID_FORMAT_STR ", expected: " SD_ID128_UUID_FORMAT_STR "), ignoring.",
1487
                                                          SD_ID128_FORMAT_VAL(id), SD_ID128_FORMAT_VAL(var_uuid));
1488
                                                continue;
1✔
1489
                                        }
1490
                                }
1491

1492
                                rw = !(pflags & SD_GPT_FLAG_READ_ONLY);
×
1493
                                growfs = FLAGS_SET(pflags, SD_GPT_FLAG_GROWFS);
×
1494
                        }
1495

1496
                        if (type.designator != _PARTITION_DESIGNATOR_INVALID) {
719✔
1497
                                _cleanup_free_ char *t = NULL, *o = NULL, *l = NULL, *n = NULL;
583✔
1498
                                _cleanup_close_ int mount_node_fd = -EBADF;
583✔
1499
                                const char *options = NULL;
583✔
1500

1501
                                r = image_policy_may_use(policy, type.designator);
583✔
1502
                                if (r < 0)
583✔
1503
                                        return r;
1504
                                if (r == 0) {
579✔
1505
                                        /* Policy says: ignore; Remember this fact, so that we later can distinguish between "found but ignored" and "not found at all" */
1506

1507
                                        if (!m->partitions[type.designator].found)
33✔
1508
                                                m->partitions[type.designator].ignored = true;
33✔
1509

1510
                                        continue;
33✔
1511
                                }
1512

1513
                                if (m->partitions[type.designator].found) {
546✔
1514
                                        int c;
14✔
1515

1516
                                        /* For most partition types the first one we see wins. Except for the
1517
                                         * rootfs and /usr, where we do a version compare of the label, and
1518
                                         * let the newest version win. This permits a simple A/B versioning
1519
                                         * scheme in OS images. */
1520

1521
                                        c = compare_arch(type.arch, m->partitions[type.designator].architecture);
14✔
1522
                                        if (c < 0) /* the arch we already found is better than the one we found now */
14✔
1523
                                                continue;
×
1524
                                        if (c == 0 && /* same arch? then go by version in label */
28✔
1525
                                            (!partition_designator_is_versioned(type.designator) ||
28✔
1526
                                             strverscmp_improved(label, m->partitions[type.designator].label) <= 0))
14✔
1527
                                                continue;
×
1528

1529
                                        dissected_partition_done(m->partitions + type.designator);
14✔
1530
                                }
1531

1532
                                if (FLAGS_SET(flags, DISSECT_IMAGE_PIN_PARTITION_DEVICES) &&
546✔
1533
                                    type.designator != PARTITION_SWAP) {
1534
                                        mount_node_fd = open_partition(node, /* is_partition= */ true, m->loop);
292✔
1535
                                        if (mount_node_fd < 0)
292✔
1536
                                                return mount_node_fd;
1537
                                }
1538

1539
                                r = make_partition_devname(devname, diskseq, nr, flags, &n);
546✔
1540
                                if (r < 0)
546✔
1541
                                        return r;
1542

1543
                                /* Local override via env var or designator type wins */
1544
                                if (fstype) {
546✔
1545
                                        t = strdup(fstype);
313✔
1546
                                        if (!t)
313✔
1547
                                                return -ENOMEM;
1548
                                } else {
1549
                                        r = partition_policy_determine_fstype(policy, type.designator, /* ret_encrypted= */ NULL, &t);
233✔
1550
                                        if (r < 0)
233✔
1551
                                                return r;
1552
                                }
1553

1554
                                if (label) {
546✔
1555
                                        l = strdup(label);
541✔
1556
                                        if (!l)
541✔
1557
                                                return -ENOMEM;
1558
                                }
1559

1560
                                options = mount_options_from_designator(mount_options, type.designator);
546✔
1561
                                if (options) {
546✔
1562
                                        o = strdup(options);
1✔
1563
                                        if (!o)
1✔
1564
                                                return -ENOMEM;
1565
                                }
1566

1567
                                m->partitions[type.designator] = (DissectedPartition) {
546✔
1568
                                        .found = true,
1569
                                        .partno = nr,
1570
                                        .rw = rw,
1571
                                        .growfs = growfs,
1572
                                        .architecture = type.arch,
546✔
1573
                                        .node = TAKE_PTR(n),
546✔
1574
                                        .fstype = TAKE_PTR(t),
546✔
1575
                                        .label = TAKE_PTR(l),
546✔
1576
                                        .uuid = id,
1577
                                        .mount_options = TAKE_PTR(o),
546✔
1578
                                        .mount_node_fd = TAKE_FD(mount_node_fd),
546✔
1579
                                        .offset = (uint64_t) start * 512,
546✔
1580
                                        .size = (uint64_t) size * 512,
546✔
1581
                                        .gpt_flags = pflags,
1582
                                        .fsmount_fd = -EBADF,
1583
                                };
1584
                        }
1585

1586
                } else if (is_mbr) {
×
1587

1588
                        switch (sym_blkid_partition_get_type(pp)) {
×
1589

1590
                        case 0x83: /* Linux partition */
×
1591

1592
                                if (pflags != 0x80) /* Bootable flag */
×
1593
                                        continue;
×
1594

1595
                                if (!image_filter_test(filter, PARTITION_ROOT, /* label= */ NULL))
×
1596
                                        continue;
×
1597

1598
                                if (generic_node)
×
1599
                                        multiple_generic = true;
1600
                                else {
1601
                                        generic_nr = nr;
×
1602
                                        generic_rw = true;
×
1603
                                        generic_growfs = false;
×
1604
                                        generic_node = TAKE_PTR(node);
×
1605
                                }
1606

1607
                                break;
1608

1609
                        case 0xEA: { /* Boot Loader Spec extended $BOOT partition */
×
1610
                                _cleanup_close_ int mount_node_fd = -EBADF;
×
1611
                                _cleanup_free_ char *o = NULL, *n = NULL;
×
1612
                                sd_id128_t id = SD_ID128_NULL;
×
1613
                                const char *options = NULL;
×
1614

1615
                                if (!image_filter_test(filter, PARTITION_XBOOTLDR, /* label= */ NULL))
×
1616
                                        continue;
×
1617

1618
                                r = image_policy_may_use(policy, PARTITION_XBOOTLDR);
×
1619
                                if (r < 0)
×
1620
                                        return r;
1621
                                if (r == 0) { /* policy says: ignore */
×
1622
                                        if (!m->partitions[PARTITION_XBOOTLDR].found)
×
1623
                                                m->partitions[PARTITION_XBOOTLDR].ignored = true;
×
1624

1625
                                        continue;
×
1626
                                }
1627

1628
                                /* First one wins */
1629
                                if (m->partitions[PARTITION_XBOOTLDR].found)
×
1630
                                        continue;
×
1631

1632
                                if (FLAGS_SET(flags, DISSECT_IMAGE_PIN_PARTITION_DEVICES)) {
×
1633
                                        mount_node_fd = open_partition(node, /* is_partition= */ true, m->loop);
×
1634
                                        if (mount_node_fd < 0)
×
1635
                                                return mount_node_fd;
1636
                                }
1637

1638
                                (void) blkid_partition_get_uuid_id128(pp, &id);
×
1639

1640
                                r = make_partition_devname(devname, diskseq, nr, flags, &n);
×
1641
                                if (r < 0)
×
1642
                                        return r;
1643

1644
                                options = mount_options_from_designator(mount_options, PARTITION_XBOOTLDR);
×
1645
                                if (options) {
×
1646
                                        o = strdup(options);
×
1647
                                        if (!o)
×
1648
                                                return -ENOMEM;
1649
                                }
1650

1651
                                m->partitions[PARTITION_XBOOTLDR] = (DissectedPartition) {
×
1652
                                        .found = true,
1653
                                        .partno = nr,
1654
                                        .rw = true,
1655
                                        .growfs = false,
1656
                                        .architecture = _ARCHITECTURE_INVALID,
1657
                                        .node = TAKE_PTR(n),
×
1658
                                        .uuid = id,
1659
                                        .mount_options = TAKE_PTR(o),
×
1660
                                        .mount_node_fd = TAKE_FD(mount_node_fd),
×
1661
                                        .offset = (uint64_t) start * 512,
×
1662
                                        .size = (uint64_t) size * 512,
×
1663
                                        .fsmount_fd = -EBADF,
1664
                                };
1665

1666
                                break;
×
1667
                        }}
1668
                }
1669
        }
1670

1671
        /* Verity found but no matching data partition? Something is off, refuse. */
1672
        FOREACH_ELEMENT(dd, ((const PartitionDesignator[]) { PARTITION_ROOT, PARTITION_USR })) {
705✔
1673
                PartitionDesignator dv = partition_verity_hash_of(*dd);
471✔
1674
                PartitionDesignator ds = partition_verity_sig_of(*dd);
471✔
1675

1676
                /* Hint to help static analyzers */
1677
                assert(dv >= 0);
471✔
1678
                assert(ds >= 0);
471✔
1679

1680
                if (!m->partitions[*dd].found && (m->partitions[dv].found || m->partitions[ds].found))
471✔
1681
                        return log_debug_errno(
2✔
1682
                                        SYNTHETIC_ERRNO(EADDRNOTAVAIL),
1683
                                        "Found %s verity hash partition without matching %s data partition.",
1684
                                        partition_designator_to_string(*dd),
1685
                                        partition_designator_to_string(*dd));
1686

1687
                if (m->partitions[ds].found && !m->partitions[dv].found)
471✔
1688
                        return log_debug_errno(
2✔
1689
                                        SYNTHETIC_ERRNO(EADDRNOTAVAIL),
1690
                                        "Found %s verity signature partition without matching %s verity hash partition.",
1691
                                        partition_designator_to_string(*dd),
1692
                                        partition_designator_to_string(*dd));
1693
        }
1694

1695
        /* If root and /usr are combined then insist that the architecture matches */
1696
        if (m->partitions[PARTITION_ROOT].found &&
234✔
1697
            m->partitions[PARTITION_USR].found &&
3✔
1698
            (m->partitions[PARTITION_ROOT].architecture >= 0 &&
3✔
1699
             m->partitions[PARTITION_USR].architecture >= 0 &&
3✔
1700
             m->partitions[PARTITION_ROOT].architecture != m->partitions[PARTITION_USR].architecture))
1701
                return log_debug_errno(SYNTHETIC_ERRNO(EREMOTE),
×
1702
                                       "Found root and usr partitions with different architectures (%s vs %s).",
1703
                                       architecture_to_string(m->partitions[PARTITION_ROOT].architecture),
1704
                                       architecture_to_string(m->partitions[PARTITION_USR].architecture));
1705

1706
        if (!m->partitions[PARTITION_ROOT].found &&
234✔
1707
            !m->partitions[PARTITION_USR].found &&
37✔
1708
            (flags & DISSECT_IMAGE_GENERIC_ROOT) &&
29✔
1709
            (!verity || !iovec_is_set(&verity->root_hash) || verity->designator != PARTITION_USR)) {
30✔
1710

1711
                /* OK, we found nothing usable, then check if there's a single generic partition, and use
1712
                 * that. If the root hash was set however, then we won't fall back to a generic node, because
1713
                 * the root hash decides. */
1714

1715
                /* If we didn't find a properly marked root partition, but we did find a single suitable
1716
                 * generic Linux partition, then use this as root partition, if the caller asked for it. */
1717
                if (multiple_generic)
29✔
1718
                        return -ENOTUNIQ;
1719

1720
                /* If we didn't find a generic node, then we can't fix this up either */
1721
                if (generic_node) {
29✔
1722
                        r = image_policy_may_use(policy, PARTITION_ROOT);
26✔
1723
                        if (r < 0)
26✔
1724
                                return r;
1725
                        if (r == 0)
26✔
1726
                                /* Policy says: ignore; remember that we did */
1727
                                m->partitions[PARTITION_ROOT].ignored = true;
×
1728
                        else {
1729
                                _cleanup_close_ int mount_node_fd = -EBADF;
26✔
1730
                                _cleanup_free_ char *o = NULL, *n = NULL;
×
1731
                                const char *options;
26✔
1732

1733
                                if (FLAGS_SET(flags, DISSECT_IMAGE_PIN_PARTITION_DEVICES)) {
26✔
1734
                                        mount_node_fd = open_partition(generic_node, /* is_partition= */ true, m->loop);
26✔
1735
                                        if (mount_node_fd < 0)
26✔
1736
                                                return mount_node_fd;
1737
                                }
1738

1739
                                r = make_partition_devname(devname, diskseq, generic_nr, flags, &n);
26✔
1740
                                if (r < 0)
26✔
1741
                                        return r;
1742

1743
                                options = mount_options_from_designator(mount_options, PARTITION_ROOT);
26✔
1744
                                if (options) {
26✔
1745
                                        o = strdup(options);
×
1746
                                        if (!o)
×
1747
                                                return -ENOMEM;
1748
                                }
1749

1750
                                assert(generic_nr >= 0);
26✔
1751
                                m->partitions[PARTITION_ROOT] = (DissectedPartition) {
26✔
1752
                                        .found = true,
1753
                                        .rw = generic_rw,
1754
                                        .growfs = generic_growfs,
1755
                                        .partno = generic_nr,
1756
                                        .architecture = _ARCHITECTURE_INVALID,
1757
                                        .node = TAKE_PTR(n),
26✔
1758
                                        .uuid = generic_uuid,
1759
                                        .mount_options = TAKE_PTR(o),
26✔
1760
                                        .mount_node_fd = TAKE_FD(mount_node_fd),
26✔
1761
                                        .offset = UINT64_MAX,
1762
                                        .size = UINT64_MAX,
1763
                                        .fsmount_fd = -EBADF,
1764
                                };
1765
                        }
1766
                }
1767
        }
1768

1769
        /* Check if we have a root fs if we are told to do check. /usr alone is fine too, but only if appropriate flag for that is set too */
1770
        if (FLAGS_SET(flags, DISSECT_IMAGE_REQUIRE_ROOT) &&
234✔
1771
            !(m->partitions[PARTITION_ROOT].found || (m->partitions[PARTITION_USR].found && FLAGS_SET(flags, DISSECT_IMAGE_USR_NO_ROOT))))
59✔
1772
                return log_debug_errno(SYNTHETIC_ERRNO(ENXIO), "Root or usr partition requested but found neither.");
1✔
1773

1774
        if (m->partitions[PARTITION_ROOT_VERITY].found) {
233✔
1775
                /* We only support one verity partition per image, i.e. can't do for both /usr and root fs */
1776
                if (m->partitions[PARTITION_USR_VERITY].found)
72✔
1777
                        return log_debug_errno(SYNTHETIC_ERRNO(ENOTUNIQ), "Found both root and usr verity enabled partitions which is not supported.");
×
1778

1779
                /* We don't support verity enabled root with a split out /usr. Neither with nor without
1780
                 * verity there. (Note that we do support verity-less root with verity-full /usr, though.) */
1781
                if (m->partitions[PARTITION_USR].found)
72✔
1782
                        return log_debug_errno(SYNTHETIC_ERRNO(EADDRNOTAVAIL), "Found verity enabled root partition with split usr partition which is not supported.");
×
1783
        }
1784

1785
        if (verity) {
233✔
1786
                /* If a verity designator is specified, then insist that the matching partition exists */
1787
                if (verity->designator >= 0 && !m->partitions[verity->designator].found)
232✔
1788
                        return log_debug_errno(
×
1789
                                SYNTHETIC_ERRNO(EADDRNOTAVAIL),
1790
                                "Explicit %s verity designator was specified but did not find %s partition.",
1791
                                partition_designator_to_string(verity->designator),
1792
                                partition_designator_to_string(verity->designator));
1793

1794
                if (iovec_is_set(&verity->root_hash)) {
1795
                        /* If we have an explicit root hash and found the partitions for it, then we are ready to use
1796
                         * Verity, set things up for it */
1797

1798
                        PartitionDesignator d = verity->designator < 0 || verity->designator == PARTITION_ROOT
23✔
1799
                                ? PARTITION_ROOT : PARTITION_USR;
23✔
1800
                        PartitionDesignator dv = partition_verity_hash_of(d);
23✔
1801
                        assert(dv >= 0);
23✔
1802

1803
                        if (!m->partitions[d].found)
23✔
1804
                                return log_debug_errno(
×
1805
                                                SYNTHETIC_ERRNO(EADDRNOTAVAIL),
1806
                                                "Verity enabled %s partition was requested but did not find a %s data partition.",
1807
                                                partition_designator_to_string(d),
1808
                                                partition_designator_to_string(d));
1809

1810
                        if (!m->partitions[dv].found)
23✔
1811
                                return log_debug_errno(
1✔
1812
                                                SYNTHETIC_ERRNO(EADDRNOTAVAIL),
1813
                                                "Verity enabled %s partition was requested but did not find a %s verity hash partition.",
1814
                                                partition_designator_to_string(d),
1815
                                                partition_designator_to_string(d));
1816

1817
                        /* If we found a verity setup, then the data partition is necessarily read-only. */
1818
                        m->partitions[d].rw = false;
22✔
1819

1820
                        m->verity_ready = true;
22✔
1821

1822
                        if (iovec_is_set(&verity->root_hash_sig))
22✔
1823
                                m->verity_sig_ready = true;
×
1824
                }
1825
        }
1826

1827
        bool any = false;
1828

1829
        /* After we discovered all partitions let's see if the verity requirements match the policy. (Note:
1830
         * we don't check encryption requirements here, because we haven't probed the file system yet, hence
1831
         * don't know if this is encrypted or not) */
1832
        for (PartitionDesignator di = 0; di < _PARTITION_DESIGNATOR_MAX; di++) {
3,191✔
1833
                any = any || m->partitions[di].found;
2,964✔
1834

1835
                /* Determine the verity protection level for this partition. */
1836
                PartitionPolicyFlags found_flags;
2,964✔
1837
                if (m->partitions[di].found) {
2,964✔
1838
                        found_flags = PARTITION_POLICY_ENCRYPTED|PARTITION_POLICY_ENCRYPTEDWITHINTEGRITY|PARTITION_POLICY_UNPROTECTED|PARTITION_POLICY_UNUSED;
551✔
1839

1840
                        PartitionDesignator vi = partition_verity_hash_of(di);
551✔
1841
                        if (vi >= 0 && m->partitions[vi].found) {
551✔
1842
                                found_flags |= PARTITION_POLICY_VERITY;
80✔
1843

1844
                                PartitionDesignator si = partition_verity_sig_of(di);
80✔
1845
                                if (si >= 0 && m->partitions[si].found)
80✔
1846
                                        found_flags |= PARTITION_POLICY_SIGNED;
69✔
1847
                        }
1848
                } else
1849
                        found_flags = m->partitions[di].ignored ? PARTITION_POLICY_UNUSED : PARTITION_POLICY_ABSENT;
2,413✔
1850

1851
                if (DEBUG_LOGGING) {
2,964✔
1852
                        _cleanup_free_ char *s = NULL;
2,483✔
1853
                        (void) partition_policy_flags_to_string(found_flags, /* simplify= */ false, &s);
2,483✔
1854
                        log_debug("Found for designator %s: %s.", partition_designator_to_string(di), strna(s));
2,483✔
1855
                }
1856

1857
                r = image_policy_check_protection(policy, di, found_flags);
2,964✔
1858
                if (r < 0)
2,964✔
1859
                        return r;
1860

1861
                if (m->partitions[di].found) {
2,959✔
1862
                        r = image_policy_check_partition_flags(policy, di, m->partitions[di].gpt_flags);
547✔
1863
                        if (r < 0)
547✔
1864
                                return r;
1865
                }
1866
        }
1867

1868
        if (!any && !FLAGS_SET(flags, DISSECT_IMAGE_ALLOW_EMPTY))
227✔
1869
                return -ENOMSG;
1870

1871
        r = dissected_image_probe_filesystems(m, fd, policy);
226✔
1872
        if (r < 0)
226✔
1873
                return r;
×
1874

1875
        return 0;
1876
}
1877
#endif
1878

1879
int dissected_image_new_from_existing_verity(
100✔
1880
                const char *src,
1881
                const VeritySettings *verity,
1882
                const MountOptions *options,
1883
                const ImagePolicy *image_policy,
1884
                const ImageFilter *image_filter,
1885
                RuntimeScope runtime_scope,
1886
                DissectImageFlags dissect_image_flags,
1887
                DissectedImage **ret) {
1888

1889
        /* Look for an already set up dm-verity device with a single filesystem, according to our naming
1890
         * scheme and image policy, and if it is pinned by filesystem type set up the image directly. */
1891

1892
#if HAVE_BLKID
1893
        _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
×
1894
        _cleanup_free_ char *node = NULL, *root_hash_encoded = NULL, *root_fstype_string = NULL;
100✔
1895
        _cleanup_close_ int mount_node_fd = -EBADF;
100✔
1896
        PartitionPolicyFlags found_flags;
100✔
1897
        bool encrypted = false;
100✔
1898
        int r;
100✔
1899

1900
        assert(!verity || verity->designator < 0 || IN_SET(verity->designator, PARTITION_ROOT, PARTITION_USR));
100✔
1901
        assert(!verity || iovec_is_valid(&verity->root_hash));
100✔
1902
        assert(!verity || iovec_is_valid(&verity->root_hash_sig));
100✔
1903
        assert(!verity || iovec_is_set(&verity->root_hash) || !iovec_is_set(&verity->root_hash_sig));
100✔
1904
        assert(ret);
100✔
1905

1906
        /* Shortcut: this deals only with verity images and requires a policy, and only for system services */
1907
        if (runtime_scope != RUNTIME_SCOPE_SYSTEM ||
100✔
1908
            !FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_VERITY_SHARE) ||
94✔
1909
            !image_policy ||
94✔
1910
            !verity ||
94✔
1911
            !verity->data_path ||
75✔
1912
            (verity->designator >= 0 && verity->designator != PARTITION_ROOT) ||
14✔
1913
            !iovec_is_set(&verity->root_hash))
114✔
1914
                return -ENOPKG;
1915

1916
        /* The policy fstype flags translate to the literal fstype name of each filesystem.
1917
         * Input must be a single filesystem image, if the policy specifies more than one, we need to dissect */
1918
        r = partition_policy_determine_fstype(image_policy, PARTITION_ROOT, &encrypted, &root_fstype_string);
14✔
1919
        if (r < 0)
14✔
1920
                return r;
1921
        if (!root_fstype_string)
14✔
1922
                return -ENOPKG;
1923

1924
        if (verity_settings_data_covers(verity, PARTITION_ROOT))
×
1925
                found_flags = iovec_is_set(&verity->root_hash_sig) ? PARTITION_POLICY_SIGNED : PARTITION_POLICY_VERITY;
×
1926
        else
1927
                found_flags = encrypted ? PARTITION_POLICY_ENCRYPTED : PARTITION_POLICY_UNPROTECTED;
×
1928

1929
        root_hash_encoded = hexmem(verity->root_hash.iov_base, verity->root_hash.iov_len);
×
1930
        if (!root_hash_encoded)
×
1931
                return -ENOMEM;
1932

1933
        node = strjoin("/dev/mapper/", root_hash_encoded, "-verity");
×
1934
        if (!node)
×
1935
                return -ENOMEM;
1936

1937
        r = dissected_image_new(src, &dissected_image);
×
1938
        if (r < 0)
×
1939
                return r;
1940

1941
        mount_node_fd = open_partition(node, /* is_partition= */ false, /* loop= */ NULL);
×
1942
        if (mount_node_fd < 0)
×
1943
                return mount_node_fd;
1944

1945
        r = dissect_image_from_unpartitioned(
×
1946
                        node,
1947
                        /* diskseq= */ 0,
1948
                        /* uuid= */ NULL,
1949
                        encrypted,
1950
                        verity,
1951
                        options,
1952
                        image_policy,
1953
                        image_filter,
1954
                        &mount_node_fd,
1955
                        &root_fstype_string,
1956
                        dissected_image,
1957
                        dissect_image_flags,
1958
                        found_flags);
1959
        if (r < 0)
×
1960
                return r;
1961

1962
        *ret = TAKE_PTR(dissected_image);
×
1963

1964
        return 0;
×
1965
#else
1966
        return -EOPNOTSUPP;
1967
#endif
1968
}
1969

1970
int dissect_image_file(
24✔
1971
                const char *path,
1972
                const VeritySettings *verity,
1973
                const MountOptions *mount_options,
1974
                const ImagePolicy *image_policy,
1975
                const ImageFilter *image_filter,
1976
                DissectImageFlags flags,
1977
                DissectedImage **ret) {
1978

1979
#if HAVE_BLKID
1980
        _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
×
1981
        _cleanup_close_ int fd = -EBADF;
24✔
1982
        struct stat st;
24✔
1983
        int r;
24✔
1984

1985
        assert(path);
24✔
1986

1987
        fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
24✔
1988
        if (fd < 0)
24✔
1989
                return -errno;
×
1990

1991
        if (fstat(fd, &st) < 0)
24✔
1992
                return -errno;
×
1993

1994
        r = stat_verify_regular(&st);
24✔
1995
        if (r < 0)
24✔
1996
                return r;
1997

1998
        r = dissected_image_new(path, &m);
24✔
1999
        if (r < 0)
24✔
2000
                return r;
2001

2002
        m->image_size = st.st_size;
24✔
2003

2004
        r = probe_sector_size(fd, &m->sector_size);
24✔
2005
        if (r < 0)
24✔
2006
                return r;
2007

2008
        r = dissect_image(m, fd, path, verity, mount_options, image_policy, image_filter, flags);
24✔
2009
        if (r < 0)
24✔
2010
                return r;
2011

2012
        if (ret)
14✔
2013
                *ret = TAKE_PTR(m);
1✔
2014
        return 0;
2015
#else
2016
        return -EOPNOTSUPP;
2017
#endif
2018
}
2019

2020
int dissect_log_error(int log_level, int r, const char *name, const VeritySettings *verity) {
174✔
2021
        assert(log_level >= 0 && log_level <= LOG_DEBUG);
174✔
2022
        assert(name);
174✔
2023

2024
        switch (r) {
174✔
2025

2026
        case 0 ... INT_MAX: /* success! */
2027
                return r;
2028

2029
        case -EOPNOTSUPP:
2030
                return log_full_errno(log_level, r, "Dissecting images is not supported, compiled without blkid support.");
×
2031

2032
        case -ENOPKG:
2033
                return log_full_errno(log_level, r, "%s: Couldn't identify a suitable partition table or file system.", name);
×
2034

2035
        case -ENOMEDIUM:
2036
                return log_full_errno(log_level, r, "%s: The image does not pass os-release/extension-release validation.", name);
×
2037

2038
        case -EADDRNOTAVAIL:
2039
                return log_full_errno(log_level, r, "%s: No root/usr partition for specified root/usr hash found.", name);
2✔
2040

2041
        case -EREMOTE:
2042
                return log_full_errno(log_level, r, "%s: Found root and usr partitions with different architectures", name);
×
2043

2044
        case -ENOTUNIQ:
2045
                return log_full_errno(log_level, r, "%s: Multiple suitable root partitions found in image.", name);
×
2046

2047
        case -ENXIO:
2048
                return log_full_errno(log_level, r, "%s: No suitable root partition found in image.", name);
×
2049

2050
        case -EPROTONOSUPPORT:
2051
                return log_full_errno(log_level, r, "Device '%s' is a loopback block device with partition scanning turned off, please turn it on.", name);
×
2052

2053
        case -ENOTBLK:
2054
                return log_full_errno(log_level, r, "%s: Image is not a block device.", name);
×
2055

2056
        case -EBADR:
2057
                return log_full_errno(log_level, r,
×
2058
                                      "Combining partitioned images (such as '%s') with external Verity data (such as '%s') not supported. "
2059
                                      "(Consider setting $SYSTEMD_DISSECT_VERITY_SIDECAR=0 to disable automatic discovery of external Verity data.)",
2060
                                      name, strna(verity ? verity->data_path : NULL));
2061

2062
        case -ERFKILL:
2063
                return log_full_errno(log_level, r, "%s: Image does not match image policy.", name);
8✔
2064

2065
        case -ENOMSG:
2066
                return log_full_errno(log_level, r, "%s: No suitable partitions found.", name);
1✔
2067

2068
        case -EUCLEAN:
2069
                return log_full_errno(log_level, r, "%s: Partition with ambiguous file system superblock signature found.", name);
×
2070

2071
        default:
2072
                return log_full_errno(log_level, r, "%s: Cannot dissect image: %m", name);
×
2073
        }
2074
}
2075

2076
int dissect_image_file_and_warn(
23✔
2077
                const char *path,
2078
                const VeritySettings *verity,
2079
                const MountOptions *mount_options,
2080
                const ImagePolicy *image_policy,
2081
                const ImageFilter *image_filter,
2082
                DissectImageFlags flags,
2083
                DissectedImage **ret) {
2084

2085
        return dissect_log_error(
23✔
2086
                        LOG_ERR,
2087
                        dissect_image_file(path, verity, mount_options, image_policy, image_filter, flags, ret),
2088
                        path,
2089
                        verity);
2090
}
2091

2092
void dissected_image_close(DissectedImage *m) {
17✔
2093
        if (!m)
17✔
2094
                return;
2095

2096
        /* Closes all fds we keep open associated with this, but nothing else */
2097

2098
        FOREACH_ARRAY(p, m->partitions, _PARTITION_DESIGNATOR_MAX) {
238✔
2099
                p->mount_node_fd = safe_close(p->mount_node_fd);
221✔
2100
                p->fsmount_fd = safe_close(p->fsmount_fd);
221✔
2101
        }
2102

2103
        m->loop = loop_device_unref(m->loop);
17✔
2104
}
2105

2106
DissectedImage* dissected_image_unref(DissectedImage *m) {
2,671✔
2107
        if (!m)
2,671✔
2108
                return NULL;
2109

2110
        /* First, clear dissected partitions. */
2111
        for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++)
36,400✔
2112
                dissected_partition_done(m->partitions + i);
33,800✔
2113

2114
        /* Second, free decrypted images. This must be after dissected_partition_done(), as freeing
2115
         * DecryptedImage may try to deactivate partitions. */
2116
        decrypted_image_unref(m->decrypted_image);
2,600✔
2117

2118
        /* Third, unref LoopDevice. This must be called after the above two, as freeing LoopDevice may try to
2119
         * remove existing partitions on the loopback block device. */
2120
        loop_device_unref(m->loop);
2,600✔
2121

2122
        free(m->image_name);
2,600✔
2123
        free(m->hostname);
2,600✔
2124
        strv_free(m->machine_info);
2,600✔
2125
        strv_free(m->os_release);
2,600✔
2126
        strv_free(m->initrd_release);
2,600✔
2127
        strv_free(m->confext_release);
2,600✔
2128
        strv_free(m->sysext_release);
2,600✔
2129

2130
        return mfree(m);
2,600✔
2131
}
2132

2133
static int is_loop_device(const char *path) {
111✔
2134
        char s[SYS_BLOCK_PATH_MAX("/../loop/")];
111✔
2135
        struct stat st;
111✔
2136

2137
        assert(path);
111✔
2138

2139
        if (stat(path, &st) < 0)
111✔
2140
                return -errno;
×
2141

2142
        if (!S_ISBLK(st.st_mode))
111✔
2143
                return -ENOTBLK;
2144

2145
        xsprintf_sys_block_path(s, "/loop/", st.st_dev);
111✔
2146
        if (access(s, F_OK) < 0) {
111✔
2147
                if (errno != ENOENT)
111✔
2148
                        return -errno;
×
2149

2150
                /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
2151
                xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
111✔
2152
                if (access(s, F_OK) < 0)
111✔
2153
                        return errno == ENOENT ? false : -errno;
111✔
2154
        }
2155

2156
        return true;
2157
}
2158

2159
static int run_fsck(int node_fd, const char *fstype) {
15✔
2160
        int r, exit_status;
15✔
2161

2162
        assert(node_fd >= 0);
15✔
2163
        assert(fstype);
15✔
2164

2165
        r = fsck_exists_for_fstype(fstype);
15✔
2166
        if (r < 0) {
15✔
2167
                log_debug_errno(r, "Couldn't determine whether fsck for %s exists, proceeding anyway.", fstype);
×
2168
                return 0;
15✔
2169
        }
2170
        if (r == 0) {
15✔
2171
                log_debug("Not checking partition %s, as fsck for %s does not exist.", FORMAT_PROC_FD_PATH(node_fd), fstype);
×
2172
                return 0;
×
2173
        }
2174

2175
        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
15✔
2176
        r = pidref_safe_fork_full(
15✔
2177
                        "(fsck)",
2178
                        NULL,
2179
                        &node_fd, 1, /* Leave the node fd open */
2180
                        FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG_SIGTERM|FORK_REARRANGE_STDIO|FORK_CLOEXEC_OFF,
2181
                        &pidref);
2182
        if (r < 0)
30✔
2183
                return log_debug_errno(r, "Failed to fork off fsck: %m");
×
2184
        if (r == 0) {
30✔
2185
                /* Child */
2186
                execlp("fsck", "fsck", "-aT", FORMAT_PROC_FD_PATH(node_fd), NULL);
15✔
2187
                log_open();
15✔
2188
                log_debug_errno(errno, "Failed to execl() fsck: %m");
×
2189
                _exit(FSCK_OPERATIONAL_ERROR);
×
2190
        }
2191

2192
        exit_status = pidref_wait_for_terminate_and_check("fsck", &pidref, 0);
15✔
2193
        if (exit_status < 0)
15✔
2194
                return log_debug_errno(exit_status, "Failed to fork off fsck: %m");
×
2195

2196
        if ((exit_status & ~FSCK_ERROR_CORRECTED) != FSCK_SUCCESS) {
15✔
2197
                log_debug("fsck failed with exit status %i.", exit_status);
×
2198

2199
                if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
×
2200
                        return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN), "File system is corrupted, refusing.");
×
2201

2202
                log_debug("Ignoring fsck error.");
×
2203
        }
2204

2205
        return 0;
2206
}
2207

2208
static int fs_grow(const char *node_path, int mount_fd, const char *mount_path) {
12✔
2209
        _cleanup_close_ int _mount_fd = -EBADF, node_fd = -EBADF;
12✔
2210
        uint64_t size, newsize;
12✔
2211
        const char *id;
12✔
2212
        int r;
12✔
2213

2214
        assert(node_path);
12✔
2215
        assert(mount_fd >= 0 || mount_path);
12✔
2216

2217
        node_fd = open(node_path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
12✔
2218
        if (node_fd < 0)
12✔
2219
                return log_debug_errno(errno, "Failed to open node device %s: %m", node_path);
×
2220

2221
        r = blockdev_get_device_size(node_fd, &size);
12✔
2222
        if (r < 0)
12✔
2223
                return log_debug_errno(r, "Failed to get block device size of %s: %m", node_path);
×
2224

2225
        if (mount_fd < 0) {
12✔
2226
                assert(mount_path);
12✔
2227

2228
                _mount_fd = open(mount_path, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
12✔
2229
                if (_mount_fd < 0)
12✔
2230
                        return log_debug_errno(errno, "Failed to open mounted file system %s: %m", mount_path);
×
2231

2232
                mount_fd = _mount_fd;
2233
        } else {
2234
                mount_fd = fd_reopen_condition(mount_fd, O_RDONLY|O_DIRECTORY|O_CLOEXEC, O_RDONLY|O_DIRECTORY|O_CLOEXEC, &_mount_fd);
×
2235
                if (mount_fd < 0)
×
2236
                        return log_debug_errno(errno, "Failed to reopen mount node: %m");
×
2237
        }
2238

2239
        id = mount_path ?: node_path;
12✔
2240

2241
        log_debug("Resizing \"%s\" to %"PRIu64" bytes...", id, size);
12✔
2242
        r = resize_fs(mount_fd, size, &newsize);
12✔
2243
        if (r < 0)
12✔
2244
                return log_debug_errno(r, "Failed to resize \"%s\" to %"PRIu64" bytes: %m", id, size);
6✔
2245

2246
        if (newsize == size)
6✔
2247
                log_debug("Successfully resized \"%s\" to %s bytes.",
6✔
2248
                          id, FORMAT_BYTES(newsize));
2249
        else {
2250
                assert(newsize < size);
×
2251
                log_debug("Successfully resized \"%s\" to %s bytes (%"PRIu64" bytes lost due to blocksize).",
×
2252
                          id, FORMAT_BYTES(newsize), size - newsize);
2253
        }
2254

2255
        return 0;
2256
}
2257

2258
int partition_pick_mount_options(
399✔
2259
                PartitionDesignator d,
2260
                const char *fstype,
2261
                bool rw,
2262
                bool discard,
2263
                char **ret_options,
2264
                unsigned long *ret_ms_flags) {
2265

2266
        _cleanup_free_ char *options = NULL;
399✔
2267

2268
        assert(ret_options);
399✔
2269

2270
        /* Selects a baseline of bind mount flags, that should always apply.
2271
         *
2272
         * Firstly, we set MS_NODEV universally on all mounts, since we don't want to allow device nodes outside of /dev/.
2273
         *
2274
         * On /var/tmp/ we'll also set MS_NOSUID, same as we set for /tmp/ on the host.
2275
         *
2276
         * On the ESP and XBOOTLDR partitions we'll also disable symlinks, and execution. These file systems
2277
         * are generally untrusted (i.e. not encrypted or authenticated), and typically VFAT hence we should
2278
         * be as restrictive as possible, and this shouldn't hurt, since the functionality is not available
2279
         * there anyway. */
2280

2281
        unsigned long flags = MS_NODEV;
399✔
2282

2283
        if (!rw)
399✔
2284
                flags |= MS_RDONLY;
254✔
2285

2286
        switch (d) {
399✔
2287

2288
        case PARTITION_ESP:
55✔
2289
        case PARTITION_XBOOTLDR:
2290
                flags |= MS_NOSUID|MS_NOEXEC|MS_NOSYMFOLLOW;
55✔
2291

2292
                /* The ESP might contain a pre-boot random seed. Let's make this unaccessible to regular
2293
                 * userspace. ESP/XBOOTLDR is almost certainly VFAT, hence if we don't know assume it is. */
2294
                if (!fstype || fstype_can_fmask_dmask(fstype))
55✔
2295
                        if (!strextend_with_separator(&options, ",", "fmask=0177,dmask=0077"))
29✔
2296
                                return -ENOMEM;
2297
                break;
2298

2299
        case PARTITION_TMP:
×
2300
                flags |= MS_NOSUID;
×
2301
                break;
×
2302

2303
        default:
399✔
2304
                ;
399✔
2305
        }
2306

2307
        /* So, when you request MS_RDONLY from ext4, then this means nothing. It happily still writes to the
2308
         * backing storage. What's worse, the BLKRO[GS]ET flag and (in case of loopback devices)
2309
         * LO_FLAGS_READ_ONLY don't mean anything, they affect userspace accesses only, and write accesses
2310
         * from the upper file system still get propagated through to the underlying file system,
2311
         * unrestricted. To actually get ext4/xfs/btrfs to stop writing to the device we need to specify
2312
         * "norecovery" as mount option, in addition to MS_RDONLY. Yes, this sucks, since it means we need to
2313
         * carry a per file system table here.
2314
         *
2315
         * Note that this means that we might not be able to mount corrupted file systems as read-only
2316
         * anymore (since in some cases the kernel implementations will refuse mounting when corrupted,
2317
         * read-only and "norecovery" is specified). But I think for the case of automatically determined
2318
         * mount options for loopback devices this is the right choice, since otherwise using the same
2319
         * loopback file twice even in read-only mode, is going to fail badly sooner or later. The use case of
2320
         * making reuse of the immutable images "just work" is more relevant to us than having read-only
2321
         * access that actually modifies stuff work on such image files. Or to say this differently: if
2322
         * people want their file systems to be fixed up they should just open them in writable mode, where
2323
         * all these problems don't exist. */
2324
        if (!rw && fstype) {
399✔
2325
                const char *option = fstype_norecovery_option(fstype);
254✔
2326

2327
                if (option && !strextend_with_separator(&options, ",", option))
254✔
2328
                        return -ENOMEM;
2329
        }
2330

2331
        if (discard && fstype && fstype_can_discard(fstype))
399✔
2332
                if (!strextend_with_separator(&options, ",", "discard"))
5✔
2333
                        return -ENOMEM;
2334

2335
        if (!ret_ms_flags) /* Fold flags into option string if ret_flags specified as NULL */
399✔
2336
                if (!strextend_with_separator(&options, ",",
25✔
2337
                                              FLAGS_SET(flags, MS_RDONLY) ? "ro" : "rw",
2338
                                              FLAGS_SET(flags, MS_NODEV) ? "nodev" : "dev",
2339
                                              FLAGS_SET(flags, MS_NOSUID) ? "nosuid" : "suid",
2340
                                              FLAGS_SET(flags, MS_NOEXEC) ? "noexec" : "exec",
2341
                                              FLAGS_SET(flags, MS_NOSYMFOLLOW) ? "nosymfollow" : NULL))
2342
                        /* NB: we suppress 'symfollow' here, since it's the default, and old /bin/mount might not know it */
2343
                        return -ENOMEM;
2344

2345
        if (ret_ms_flags)
394✔
2346
                *ret_ms_flags = flags;
394✔
2347

2348
        *ret_options = TAKE_PTR(options);
399✔
2349
        return 0;
399✔
2350
}
2351

2352
static bool need_user_mapping(uid_t uid_shift, uid_t uid_range) {
742✔
2353

2354
        if (!uid_is_valid(uid_shift))
742✔
2355
                return false;
2356

2357
        return uid_shift != 0 || uid_range != UINT32_MAX;
10✔
2358
}
2359

2360
static int mount_partition(
2,155✔
2361
                PartitionDesignator d,
2362
                DissectedPartition *m,
2363
                const char *where,
2364
                const char *directory,
2365
                uid_t uid_shift,
2366
                uid_t uid_range,
2367
                int userns_fd,
2368
                DissectImageFlags flags) {
2369

2370
        _cleanup_free_ char *chased = NULL, *options = NULL;
2,155✔
2371
        const char *p = NULL, *node, *fstype = NULL;
2,155✔
2372
        bool rw, discard, grow;
2,155✔
2373
        unsigned long ms_flags;
2,155✔
2374
        int r;
2,155✔
2375

2376
        assert(m);
2,155✔
2377

2378
        if (!m->found)
2,155✔
2379
                return 0;
2380

2381
        /* Check the various combinations when we can't do anything anymore */
2382
        if (m->fsmount_fd < 0 && m->mount_node_fd < 0)
463✔
2383
                return 0;
2384
        if (m->fsmount_fd >= 0 && !where)
463✔
2385
                return 0;
2386
        if (!where && m->mount_node_fd < 0)
394✔
2387
                return 0;
2388

2389
        if (m->fsmount_fd < 0) {
463✔
2390
                fstype = dissected_partition_fstype(m);
394✔
2391
                if (!fstype)
394✔
2392
                        return -EAFNOSUPPORT;
2393

2394
                /* We are looking at an encrypted partition? This either means stacked encryption, or the
2395
                 * caller didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error
2396
                 * for this case. */
2397
                if (streq(fstype, "crypto_LUKS"))
394✔
2398
                        return -EUNATCH;
2399

2400
                r = dissect_fstype_ok(fstype);
394✔
2401
                if (r < 0)
394✔
2402
                        return r;
2403
                if (!r)
394✔
2404
                        return -EIDRM; /* Recognizable error */
2405
        }
2406

2407
        node = m->mount_node_fd < 0 ? NULL : FORMAT_PROC_FD_PATH(m->mount_node_fd);
463✔
2408
        rw = m->rw && !(flags & DISSECT_IMAGE_MOUNT_READ_ONLY);
463✔
2409

2410
        discard = ((flags & DISSECT_IMAGE_DISCARD) ||
463✔
2411
                   ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && (m->node && is_loop_device(m->node) > 0)));
462✔
2412

2413
        grow = rw && m->growfs && FLAGS_SET(flags, DISSECT_IMAGE_GROWFS);
463✔
2414

2415
        if (FLAGS_SET(flags, DISSECT_IMAGE_FSCK) && rw && m->mount_node_fd >= 0 && m->fsmount_fd < 0) {
463✔
2416
                r = run_fsck(m->mount_node_fd, fstype);
15✔
2417
                if (r < 0)
15✔
2418
                        return r;
2419
        }
2420

2421
        if (where) {
463✔
2422
                if (directory) {
461✔
2423
                        /* Automatically create missing mount points inside the image, if necessary. */
2424
                        r = mkdir_p_root(where, directory, uid_shift, (gid_t) uid_shift, 0755);
66✔
2425
                        if (r < 0 && r != -EROFS)
66✔
2426
                                return r;
2427

2428
                        r = chase(directory, where, CHASE_PREFIX_ROOT, &chased, NULL);
66✔
2429
                        if (r < 0)
66✔
2430
                                return r;
2431

2432
                        p = chased;
66✔
2433
                } else {
2434
                        /* Create top-level mount if missing – but only if this is asked for. This won't modify the
2435
                         * image (as the branch above does) but the host hierarchy, and the created directory might
2436
                         * survive our mount in the host hierarchy hence. */
2437
                        if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
395✔
2438
                                r = mkdir_p(where, 0755);
14✔
2439
                                if (r < 0)
14✔
2440
                                        return r;
2441
                        }
2442

2443
                        p = where;
2444
                }
2445
        }
2446

2447
        if (m->fsmount_fd < 0) {
463✔
2448
                r = partition_pick_mount_options(d, fstype, rw, discard, &options, &ms_flags);
394✔
2449
                if (r < 0)
394✔
2450
                        return r;
2451

2452
                if (need_user_mapping(uid_shift, uid_range) && fstype_can_uid_gid(fstype)) {
394✔
2453
                        _cleanup_free_ char *uid_option = NULL;
×
2454

2455
                        if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
×
2456
                                return -ENOMEM;
2457

2458
                        if (!strextend_with_separator(&options, ",", uid_option))
×
2459
                                return -ENOMEM;
2460

2461
                        userns_fd = -EBADF; /* Not needed */
×
2462
                }
2463

2464
                if (!isempty(m->mount_options))
394✔
2465
                        if (!strextend_with_separator(&options, ",", m->mount_options))
4✔
2466
                                return -ENOMEM;
2467
        }
2468

2469
        if (p) {
463✔
2470
                if (m->fsmount_fd >= 0) {
461✔
2471
                        /* Case #1: Attach existing fsmount fd to the file system */
2472

2473
                        r = mount_exchange_graceful(
138✔
2474
                                        m->fsmount_fd,
2475
                                        p,
2476
                                        FLAGS_SET(flags, DISSECT_IMAGE_TRY_ATOMIC_MOUNT_EXCHANGE));
69✔
2477
                        if (r < 0)
69✔
2478
                                return log_debug_errno(r, "Failed to mount image on '%s': %m", p);
×
2479

2480
                } else {
2481
                        assert(node);
392✔
2482

2483
                        /* Case #2: Mount directly into place */
2484
                        r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, ms_flags, options);
392✔
2485
                        if (r < 0)
392✔
2486
                                return r;
2487

2488
                        if (grow)
392✔
2489
                                (void) fs_grow(node, -EBADF, p);
12✔
2490

2491
                        if (userns_fd >= 0) {
392✔
2492
                                r = remount_idmap_fd(STRV_MAKE(p), userns_fd, /* extra_mount_attr_set= */ 0);
×
2493
                                if (r < 0)
×
2494
                                        return r;
×
2495
                        }
2496
                }
2497
        } else {
2498
                assert(node);
2✔
2499

2500
                /* Case #3: Create fsmount fd */
2501

2502
                m->fsmount_fd = make_fsmount(LOG_DEBUG, node, fstype, ms_flags, options, userns_fd);
2✔
2503
                if (m->fsmount_fd < 0)
2✔
2504
                        return m->fsmount_fd;
2505

2506
                if (grow)
2✔
2507
                        (void) fs_grow(node, m->fsmount_fd, NULL);
×
2508
        }
2509

2510
        return 1;
2511
}
2512

2513
static int mount_root_tmpfs(const char *where, uid_t uid_shift, uid_t uid_range, DissectImageFlags flags) {
3✔
2514
        _cleanup_free_ char *options = NULL;
3✔
2515
        int r;
3✔
2516

2517
        assert(where);
3✔
2518

2519
        /* For images that contain /usr/ but no rootfs, let's mount rootfs as tmpfs */
2520

2521
        if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
3✔
2522
                r = mkdir_p(where, 0755);
×
2523
                if (r < 0)
×
2524
                        return r;
2525
        }
2526

2527
        if (need_user_mapping(uid_shift, uid_range)) {
3✔
2528
                if (asprintf(&options, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
×
2529
                        return -ENOMEM;
2530
        }
2531

2532
        r = mount_nofollow_verbose(LOG_DEBUG, "rootfs", where, "tmpfs", MS_NODEV, options);
3✔
2533
        if (r < 0)
3✔
2534
                return r;
×
2535

2536
        return 1;
2537
}
2538

2539
static int mount_point_is_available(const char *where, const char *path, bool missing_ok) {
318✔
2540
        _cleanup_free_ char *p = NULL;
318✔
2541
        int r;
318✔
2542

2543
        /* Check whether <path> is suitable as a mountpoint, i.e. is an empty directory
2544
         * or does not exist at all (when missing_ok). */
2545

2546
        r = chase(path, where, CHASE_PREFIX_ROOT, &p, NULL);
318✔
2547
        if (r == -ENOENT)
318✔
2548
                return missing_ok;
153✔
2549
        if (r < 0)
165✔
2550
                return log_debug_errno(r, "Failed to chase \"%s\": %m", path);
×
2551

2552
        r = dir_is_empty(p, /* ignore_hidden_or_backup= */ false);
165✔
2553
        if (r == -ENOTDIR)
165✔
2554
                return false;
2555
        if (r < 0)
165✔
2556
                return log_debug_errno(r, "Failed to check directory \"%s\": %m", p);
×
2557
        return r > 0;
165✔
2558
}
2559

2560
int dissected_image_mount(
406✔
2561
                DissectedImage *m,
2562
                const char *where,
2563
                uid_t uid_shift,
2564
                uid_t uid_range,
2565
                int userns_fd,
2566
                DissectImageFlags flags) {
2567

2568
        _cleanup_close_ int my_userns_fd = -EBADF;
406✔
2569
        int r;
406✔
2570

2571
        assert(m);
406✔
2572

2573
        if (FLAGS_SET(flags, DISSECT_IMAGE_FOREIGN_UID)) /* For image based mounts we currently require an identity mapping */
406✔
2574
                return -EOPNOTSUPP;
2575

2576
        /* If 'where' is NULL then we'll use the new mount API to create fsmount() fds for the mounts and
2577
         * store them in DissectedPartition.fsmount_fd.
2578
         *
2579
         * If 'where' is not NULL then we'll either mount the partitions to the right places ourselves,
2580
         * or use DissectedPartition.fsmount_fd and bind it to the right places.
2581
         *
2582
         * This allows splitting the setting up the superblocks and the binding to file systems paths into
2583
         * two distinct and differently privileged components: one that gets the fsmount fds, and the other
2584
         * that then applies them.
2585
         *
2586
         * Returns:
2587
         *
2588
         *  -ENXIO        → No root partition found
2589
         *  -EMEDIUMTYPE  → DISSECT_IMAGE_VALIDATE_OS set but no os-release/extension-release file found
2590
         *  -EUNATCH      → Encrypted partition found for which no dm-crypt was set up yet
2591
         *  -EUCLEAN      → fsck for file system failed
2592
         *  -EBUSY        → File system already mounted/used elsewhere (kernel)
2593
         *  -EAFNOSUPPORT → File system type not supported or not known
2594
         *  -EIDRM        → File system is not among allowlisted "common" file systems
2595
         */
2596

2597
        if (!where && (flags & (DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_VALIDATE_OS_EXT)) != 0)
406✔
2598
                return -EOPNOTSUPP; /* for now, not supported */
2599

2600
        if (!(m->partitions[PARTITION_ROOT].found ||
406✔
2601
              (m->partitions[PARTITION_USR].found && FLAGS_SET(flags, DISSECT_IMAGE_USR_NO_ROOT))))
3✔
2602
                return -ENXIO; /* Require a root fs or at least a /usr/ fs (the latter is subject to a flag of its own) */
2603

2604
        if (userns_fd < 0 && need_user_mapping(uid_shift, uid_range) && FLAGS_SET(flags, DISSECT_IMAGE_MOUNT_IDMAPPED)) {
406✔
2605

2606
                my_userns_fd = make_userns(uid_shift, uid_range, UID_INVALID, UID_INVALID, REMOUNT_IDMAPPING_HOST_ROOT);
2✔
2607
                if (my_userns_fd < 0)
2✔
2608
                        return my_userns_fd;
2609

2610
                userns_fd = my_userns_fd;
2611
        }
2612

2613
        if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
406✔
2614

2615
                /* First mount the root fs. If there's none we use a tmpfs. */
2616
                if (m->partitions[PARTITION_ROOT].found) {
400✔
2617
                        r = mount_partition(PARTITION_ROOT, m->partitions + PARTITION_ROOT, where, NULL, uid_shift, uid_range, userns_fd, flags);
397✔
2618
                        if (r < 0)
397✔
2619
                                return r;
2620

2621
                } else if (where) {
3✔
2622
                        r = mount_root_tmpfs(where, uid_shift, uid_range, flags);
3✔
2623
                        if (r < 0)
3✔
2624
                                return r;
2625
                }
2626

2627
                /* For us mounting root always means mounting /usr as well */
2628
                r = mount_partition(PARTITION_USR, m->partitions + PARTITION_USR, where, "/usr", uid_shift, uid_range, userns_fd, flags);
400✔
2629
                if (r < 0)
400✔
2630
                        return r;
2631
        }
2632

2633
        if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0 &&
400✔
2634
            (flags & (DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_VALIDATE_OS_EXT)) != 0) {
400✔
2635
                /* If either one of the validation flags are set, ensure that the image qualifies as
2636
                 * one or the other (or both). */
2637
                bool ok = false;
199✔
2638

2639
                assert(where);
199✔
2640

2641
                if (FLAGS_SET(flags, DISSECT_IMAGE_VALIDATE_OS)) {
199✔
2642
                        log_debug("Checking if '%s' is an OS tree", where);
108✔
2643

2644
                        r = path_is_os_tree(where);
108✔
2645
                        if (r < 0)
108✔
2646
                                return log_debug_errno(r, "Failed to check is '%s' is an OS tree: %m", where);
×
2647
                        if (r > 0) {
108✔
2648
                                log_debug("Successfully identified '%s' as an OS tree", where);
69✔
2649
                                ok = true;
2650
                        }
2651
                }
2652
                if (!ok && FLAGS_SET(flags, DISSECT_IMAGE_VALIDATE_OS_EXT) && m->image_name) {
130✔
2653
                        log_debug("Checking if '%s' is an extension tree", where);
130✔
2654

2655
                        r = extension_has_forbidden_content(where);
130✔
2656
                        if (r < 0)
130✔
2657
                                return log_debug_errno(r, "Failed to check if '%s' contains content forbidden for an extension image: %m", where);
×
2658
                        if (r == 0) {
130✔
2659
                                ImageClass class = IMAGE_SYSEXT;
130✔
2660
                                r = path_is_extension_tree(class, where, m->image_name, FLAGS_SET(flags, DISSECT_IMAGE_RELAX_EXTENSION_CHECK));
130✔
2661
                                if (r == 0) {
130✔
2662
                                        class = IMAGE_CONFEXT;
10✔
2663
                                        r = path_is_extension_tree(class, where, m->image_name, FLAGS_SET(flags, DISSECT_IMAGE_RELAX_EXTENSION_CHECK));
10✔
2664
                                }
2665
                                if (r < 0)
130✔
2666
                                        return log_debug_errno(r, "Failed to check if '%s' is an extension tree: %m", where);
×
2667
                                if (r > 0) {
130✔
2668
                                        log_debug("Successfully identified '%s' as a %s extension tree", where, image_class_to_string(class));
130✔
2669
                                        ok = true;
2670
                                }
2671
                        }
2672
                }
2673

2674
                if (!ok)
×
2675
                        return -ENOMEDIUM;
×
2676
        }
2677

2678
        if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
406✔
2679
                return 0;
2680

2681
        r = mount_partition(PARTITION_HOME, m->partitions + PARTITION_HOME, where, "/home", uid_shift, uid_range, userns_fd, flags);
288✔
2682
        if (r < 0)
288✔
2683
                return r;
2684

2685
        r = mount_partition(PARTITION_SRV, m->partitions + PARTITION_SRV, where, "/srv", uid_shift, uid_range, userns_fd, flags);
288✔
2686
        if (r < 0)
288✔
2687
                return r;
2688

2689
        r = mount_partition(PARTITION_VAR, m->partitions + PARTITION_VAR, where, "/var", uid_shift, uid_range, userns_fd, flags);
288✔
2690
        if (r < 0)
288✔
2691
                return r;
2692

2693
        r = mount_partition(PARTITION_TMP, m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, uid_range, userns_fd, flags);
288✔
2694
        if (r < 0)
288✔
2695
                return r;
2696

2697
        int slash_boot_is_available = 0;
288✔
2698
        if (where) {
288✔
2699
                r = slash_boot_is_available = mount_point_is_available(where, "/boot", /* missing_ok= */ true);
286✔
2700
                if (r < 0)
286✔
2701
                        return r;
2702
        }
2703
        if (!where || slash_boot_is_available) {
288✔
2704
                r = mount_partition(PARTITION_XBOOTLDR, m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, userns_fd, flags);
177✔
2705
                if (r < 0)
177✔
2706
                        return r;
2707
                slash_boot_is_available = !r;
177✔
2708
        }
2709

2710
        if (m->partitions[PARTITION_ESP].found) {
288✔
2711
                const char *esp_path = NULL;
29✔
2712

2713
                if (where) {
29✔
2714
                        /* Mount the ESP to /boot/ if it exists and is empty and we didn't already mount the
2715
                         * XBOOTLDR partition into it. Otherwise, use /efi instead, but only if it exists
2716
                         * and is empty. */
2717

2718
                        if (slash_boot_is_available) {
29✔
2719
                                r = mount_point_is_available(where, "/boot", /* missing_ok= */ false);
3✔
2720
                                if (r < 0)
3✔
2721
                                        return r;
2722
                                if (r > 0)
3✔
2723
                                        esp_path = "/boot";
2724
                        }
2725

2726
                        if (!esp_path) {
2727
                                r = mount_point_is_available(where, "/efi", /* missing_ok= */ true);
29✔
2728
                                if (r < 0)
29✔
2729
                                        return r;
2730
                                if (r > 0)
29✔
2731
                                        esp_path = "/efi";
29✔
2732
                        }
2733
                }
2734

2735
                /* OK, let's mount the ESP now (possibly creating the dir if missing) */
2736
                r = mount_partition(PARTITION_ESP, m->partitions + PARTITION_ESP, where, esp_path, uid_shift, uid_range, userns_fd, flags);
29✔
2737
                if (r < 0)
29✔
2738
                        return r;
×
2739
        }
2740

2741
        return 0;
2742
}
2743

2744
int dissected_image_mount_and_warn(
118✔
2745
                DissectedImage *m,
2746
                const char *where,
2747
                uid_t uid_shift,
2748
                uid_t uid_range,
2749
                int userns_fd,
2750
                DissectImageFlags flags) {
2751

2752
        int r;
118✔
2753

2754
        assert(m);
118✔
2755

2756
        r = dissected_image_mount(m, where, uid_shift, uid_range, userns_fd, flags);
118✔
2757
        if (r == -ENXIO)
118✔
2758
                return log_error_errno(r, "Failed to mount image: No root file system found in image.");
×
2759
        if (r == -EMEDIUMTYPE)
118✔
2760
                return log_error_errno(r, "Failed to mount image: No suitable os-release/extension-release file in image found.");
×
2761
        if (r == -EUNATCH)
118✔
2762
                return log_error_errno(r, "Failed to mount image: Encrypted file system discovered, but decryption not requested.");
×
2763
        if (r == -EUCLEAN)
118✔
2764
                return log_error_errno(r, "Failed to mount image: File system check on image failed.");
×
2765
        if (r == -EBUSY)
118✔
2766
                return log_error_errno(r, "Failed to mount image: File system already mounted elsewhere.");
×
2767
        if (r == -EAFNOSUPPORT)
118✔
2768
                return log_error_errno(r, "Failed to mount image: File system type not supported or not known.");
×
2769
        if (r == -EIDRM)
118✔
2770
                return log_error_errno(r, "Failed to mount image: File system is too uncommon, refused.");
×
2771
        if (r < 0)
118✔
2772
                return log_error_errno(r, "Failed to mount image: %m");
×
2773

2774
        return r;
2775
}
2776

2777
#if HAVE_LIBCRYPTSETUP
2778
struct DecryptedPartition {
2779
        struct crypt_device *device;
2780
        char *name;
2781
        bool relinquished;
2782
};
2783
#endif
2784

2785
typedef struct DecryptedPartition DecryptedPartition;
2786

2787
struct DecryptedImage {
2788
        unsigned n_ref;
2789
        DecryptedPartition *decrypted;
2790
        size_t n_decrypted;
2791
};
2792

2793
static DecryptedImage* decrypted_image_free(DecryptedImage *d) {
85✔
2794
#if HAVE_LIBCRYPTSETUP
2795
        int r;
85✔
2796

2797
        if (!d)
85✔
2798
                return NULL;
2799

2800
        for (size_t i = 0; i < d->n_decrypted; i++) {
169✔
2801
                DecryptedPartition *p = d->decrypted + i;
84✔
2802

2803
                if (p->device && p->name && !p->relinquished) {
84✔
2804
                        _cleanup_free_ char *node = NULL;
25✔
2805

2806
                        node = path_join("/dev/mapper", p->name);
25✔
2807
                        if (node) {
25✔
2808
                                r = btrfs_forget_device(node);
25✔
2809
                                if (r < 0 && r != -ENOENT)
25✔
2810
                                        log_debug_errno(r, "Failed to forget btrfs device %s, ignoring: %m", node);
×
2811
                        } else
2812
                                log_oom_debug();
×
2813

2814
                        /* Let's deactivate lazily, as the dm volume may be already/still used by other processes. */
2815
                        r = sym_crypt_deactivate_by_name(p->device, p->name, CRYPT_DEACTIVATE_DEFERRED);
25✔
2816
                        if (r < 0)
25✔
2817
                                log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
25✔
2818
                }
2819

2820
                if (p->device)
84✔
2821
                        sym_crypt_free(p->device);
84✔
2822
                free(p->name);
84✔
2823
        }
2824

2825
        free(d->decrypted);
85✔
2826
        free(d);
85✔
2827
#endif
2828
        return NULL;
85✔
2829
}
2830

2831
DEFINE_TRIVIAL_REF_UNREF_FUNC(DecryptedImage, decrypted_image, decrypted_image_free);
2,601✔
2832

2833
#if HAVE_LIBCRYPTSETUP
2834
static int decrypted_image_new(DecryptedImage **ret) {
85✔
2835
        _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
85✔
2836

2837
        assert(ret);
85✔
2838

2839
        d = new(DecryptedImage, 1);
85✔
2840
        if (!d)
85✔
2841
                return -ENOMEM;
2842

2843
        *d = (DecryptedImage) {
85✔
2844
                .n_ref = 1,
2845
        };
2846

2847
        *ret = TAKE_PTR(d);
85✔
2848
        return 0;
85✔
2849
}
2850

2851
static uint64_t dissected_image_diskseq(const DissectedImage *di) {
61✔
2852
        assert(di);
61✔
2853

2854
        return di->loop ? di->loop->diskseq : 0;
61✔
2855
}
2856

2857
static int make_dm_name_and_node(
85✔
2858
                const char *base,
2859
                uint64_t diskseq,
2860
                const char *suffix,
2861
                char **ret_name,
2862
                char **ret_node) {
2863

2864
        assert(base);
85✔
2865
        assert(suffix);
85✔
2866
        assert(ret_name);
85✔
2867
        assert(ret_node);
85✔
2868

2869
        _cleanup_free_ char *name = NULL;
85✔
2870
        if (diskseq != 0)
85✔
2871
                name = asprintf_safe("%s-%" PRIu64 "%s", base, diskseq, suffix);
61✔
2872
        else
2873
                name = strjoin(base, suffix);
24✔
2874
        if (!name)
85✔
2875
                return -ENOMEM;
2876
        if (!filename_is_valid(name))
85✔
2877
                return -EINVAL;
2878

2879
        _cleanup_free_ char *node = path_join(sym_crypt_get_dir(), name);
170✔
2880
        if (!node)
85✔
2881
                return -ENOMEM;
2882

2883
        *ret_name = TAKE_PTR(name);
85✔
2884
        *ret_node = TAKE_PTR(node);
85✔
2885

2886
        return 0;
85✔
2887
}
2888

2889
static int make_dm_name_and_node_from_node(
61✔
2890
                const char *original_node,
2891
                uint64_t diskseq,
2892
                const char *suffix,
2893
                char **ret_name,
2894
                char **ret_node) {
2895

2896
        int r;
61✔
2897

2898
        assert(original_node);
61✔
2899

2900
        _cleanup_free_ char *base = NULL;
61✔
2901
        r = path_extract_filename(original_node, &base);
61✔
2902
        if (r < 0)
61✔
2903
                return r;
2904

2905
        return make_dm_name_and_node(base, diskseq, suffix, ret_name, ret_node);
61✔
2906
}
2907

2908
static int decrypt_partition(
193✔
2909
                DissectedImage *di,
2910
                DissectedPartition *m,
2911
                const char *passphrase,
2912
                DissectImageFlags flags,
2913
                PartitionPolicyFlags policy_flags,
2914
                DecryptedImage *d) {
2915

2916
        _cleanup_free_ char *node = NULL, *name = NULL;
193✔
2917
        _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
×
2918
        _cleanup_close_ int fd = -EBADF;
193✔
2919
        int r;
193✔
2920

2921
        assert(di);
193✔
2922
        assert(m);
193✔
2923
        assert(d);
193✔
2924

2925
        if (!m->found || !m->node || !m->fstype)
193✔
2926
                return 0;
2927

2928
        if (!streq(m->fstype, "crypto_LUKS"))
193✔
2929
                return 0;
2930

2931
        if (!passphrase)
2✔
2932
                return -ENOKEY;
2933

2934
        if (!FLAGS_SET(policy_flags, PARTITION_POLICY_ENCRYPTED))
2✔
2935
                return log_debug_errno(SYNTHETIC_ERRNO(ERFKILL), "Attempted to unlock partition via LUKS, but it's prohibited.");
×
2936

2937
        r = dlopen_cryptsetup();
2✔
2938
        if (r < 0)
2✔
2939
                return r;
2940

2941
        r = make_dm_name_and_node_from_node(m->node, dissected_image_diskseq(di), "-decrypted", &name, &node);
2✔
2942
        if (r < 0)
2✔
2943
                return r;
2944

2945
        if (!GREEDY_REALLOC0(d->decrypted, d->n_decrypted + 1))
2✔
2946
                return -ENOMEM;
2947

2948
        r = sym_crypt_init(&cd, m->node);
2✔
2949
        if (r < 0)
2✔
2950
                return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
×
2951

2952
        cryptsetup_enable_logging(cd);
2✔
2953

2954
        r = sym_crypt_load(cd, CRYPT_LUKS, NULL);
2✔
2955
        if (r < 0)
2✔
2956
                return log_debug_errno(r, "Failed to load LUKS metadata: %m");
×
2957

2958
        r = sym_crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
2✔
2959
                                             ((flags & DISSECT_IMAGE_DEVICE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
2960
                                             ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
2961
        if (r < 0) {
2✔
2962
                log_debug_errno(r, "Failed to activate LUKS device: %m");
×
2963
                return r == -EPERM ? -EKEYREJECTED : r;
×
2964
        }
2965

2966
        fd = open(node, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
2✔
2967
        if (fd < 0)
2✔
2968
                return log_debug_errno(errno, "Failed to open %s: %m", node);
×
2969

2970
        d->decrypted[d->n_decrypted++] = (DecryptedPartition) {
2✔
2971
                .name = TAKE_PTR(name),
2✔
2972
                .device = TAKE_PTR(cd),
2✔
2973
        };
2974

2975
        m->decrypted_node = TAKE_PTR(node);
2✔
2976
        close_and_replace(m->mount_node_fd, fd);
2✔
2977

2978
        return 0;
2✔
2979
}
2980

2981
static int verity_can_reuse(
8✔
2982
                const VeritySettings *verity,
2983
                const char *name,
2984
                struct crypt_device **ret_cd) {
2985

2986
        /* If the same volume was already open, check that the root hashes match, and reuse it if they do */
2987
        _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
8✔
2988
        struct crypt_params_verity crypt_params = {};
8✔
2989
        int r;
8✔
2990

2991
        assert(verity);
8✔
2992
        assert(name);
8✔
2993
        assert(ret_cd);
8✔
2994

2995
        r = sym_crypt_init_by_name(&cd, name);
8✔
2996
        if (r < 0)
8✔
2997
                return log_debug_errno(r, "Error opening verity device, crypt_init_by_name failed: %m");
×
2998

2999
        cryptsetup_enable_logging(cd);
8✔
3000

3001
        r = sym_crypt_get_verity_info(cd, &crypt_params);
8✔
3002
        if (r < 0)
8✔
3003
                return log_debug_errno(r, "Error opening verity device, crypt_get_verity_info failed: %m");
×
3004

3005
        _cleanup_(iovec_done) struct iovec root_hash_existing = {
8✔
3006
                .iov_base = malloc0(verity->root_hash.iov_len),
8✔
3007
                .iov_len = verity->root_hash.iov_len,
3008
        };
3009
        if (!root_hash_existing.iov_base)
8✔
3010
                return -ENOMEM;
3011

3012
        r = sym_crypt_volume_key_get(cd, CRYPT_ANY_SLOT, root_hash_existing.iov_base, &root_hash_existing.iov_len, NULL, 0);
8✔
3013
        if (r < 0)
8✔
3014
                return log_debug_errno(r, "Error opening verity device, crypt_volume_key_get failed: %m");
×
3015
        if (iovec_memcmp(&verity->root_hash, &root_hash_existing) != 0)
8✔
3016
                return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but root hashes are different.");
×
3017

3018
        /* Ensure that, if signatures are supported, we only reuse the device if the previous mount used the
3019
         * same settings, so that a previous unsigned mount will not be reused if the user asks to use
3020
         * signing for the new one, and vice versa. */
3021
        if (iovec_is_set(&verity->root_hash_sig) != FLAGS_SET(crypt_params.flags, CRYPT_VERITY_ROOT_HASH_SIGNATURE))
16✔
3022
                return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but signature settings are not the same.");
×
3023

3024
        *ret_cd = TAKE_PTR(cd);
8✔
3025
        return 0;
8✔
3026
}
3027

3028
static char* dm_deferred_remove_clean(char *name) {
×
3029
        if (!name)
×
3030
                return NULL;
3031

3032
        (void) sym_crypt_deactivate_by_name(NULL, name, CRYPT_DEACTIVATE_DEFERRED);
×
3033
        return mfree(name);
×
3034
}
3035
DEFINE_TRIVIAL_CLEANUP_FUNC(char *, dm_deferred_remove_clean);
83✔
3036

3037
static int validate_signature_userspace(const VeritySettings *verity, const char *root, DissectImageFlags flags) {
41✔
3038
        int r;
41✔
3039

3040
        /* Returns > 0 if signature checks out, == 0 if not, < 0 on unexpected errors */
3041

3042
        if (!FLAGS_SET(flags, DISSECT_IMAGE_ALLOW_USERSPACE_VERITY)) {
41✔
3043
                log_debug("Userspace dm-verity signature authentication disabled via flag.");
×
3044
                return 0;
41✔
3045
        }
3046

3047
        r = secure_getenv_bool("SYSTEMD_ALLOW_USERSPACE_VERITY");
41✔
3048
        if (r < 0 && r != -ENXIO) {
41✔
3049
                log_debug_errno(r, "Failed to parse $SYSTEMD_ALLOW_USERSPACE_VERITY environment variable, refusing userspace dm-verity signature authentication.");
×
3050
                return 0;
×
3051
        }
3052
        if (!r) {
41✔
3053
                log_debug("Userspace dm-verity signature authentication disabled via $SYSTEMD_ALLOW_USERSPACE_VERITY environment variable.");
×
3054
                return 0;
×
3055
        }
3056

3057
        bool b;
41✔
3058
        r = proc_cmdline_get_bool("systemd.allow_userspace_verity", PROC_CMDLINE_TRUE_WHEN_MISSING, &b);
41✔
3059
        if (r < 0) {
41✔
3060
                log_debug_errno(r, "Failed to parse systemd.allow_userspace_verity= kernel command line option, refusing userspace dm-verity signature authentication.");
×
3061
                return 0;
×
3062
        }
3063
        if (!b) {
41✔
3064
                log_debug("Userspace dm-verity signature authentication disabled via systemd.allow_userspace_verity= kernel command line variable.");
×
3065
                return 0;
×
3066
        }
3067

3068
#if HAVE_OPENSSL
3069
        _cleanup_(sk_X509_free_allp) STACK_OF(X509) *sk = NULL;
41✔
3070
        _cleanup_strv_free_ char **certs = NULL;
×
3071
        _cleanup_(PKCS7_freep) PKCS7 *p7 = NULL;
41✔
3072
        _cleanup_free_ char *s = NULL;
41✔
3073
        _cleanup_(BIO_freep) BIO *bio = NULL; /* 'bio' must be freed first, 's' second, hence keep this order
41✔
3074
                                               * of declaration in place, please */
3075
        assert(verity);
41✔
3076
        assert(iovec_is_set(&verity->root_hash));
41✔
3077
        assert(iovec_is_set(&verity->root_hash_sig));
41✔
3078

3079
        /* Because installing a signature certificate into the kernel chain is so messy, let's optionally do
3080
         * userspace validation. */
3081

3082
        r = conf_files_list_nulstr(&certs, ".crt", root, CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, CONF_PATHS_NULSTR("verity.d"));
41✔
3083
        if (r < 0)
41✔
3084
                return log_debug_errno(r, "Failed to enumerate certificates: %m");
×
3085
        if (strv_isempty(certs)) {
41✔
3086
                log_debug("No userspace dm-verity certificates found.");
×
3087
                return 0;
×
3088
        }
3089

3090
        const unsigned char *d = verity->root_hash_sig.iov_base;
41✔
3091
        p7 = d2i_PKCS7(NULL, &d, (long) verity->root_hash_sig.iov_len);
41✔
3092
        if (!p7)
41✔
3093
                return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse PKCS7 DER signature data.");
×
3094

3095
        s = hexmem(verity->root_hash.iov_base, verity->root_hash.iov_len);
41✔
3096
        if (!s)
41✔
3097
                return log_oom_debug();
×
3098

3099
        bio = BIO_new_mem_buf(s, strlen(s));
41✔
3100
        if (!bio)
41✔
3101
                return log_oom_debug();
×
3102

3103
        sk = sk_X509_new_null();
41✔
3104
        if (!sk)
41✔
3105
                return log_oom_debug();
×
3106

3107
        STRV_FOREACH(i, certs) {
109✔
3108
                _cleanup_(X509_freep) X509 *c = NULL;
41✔
3109
                _cleanup_fclose_ FILE *f = NULL;
68✔
3110

3111
                f = fopen(*i, "re");
68✔
3112
                if (!f) {
68✔
3113
                        log_debug_errno(errno, "Failed to open '%s', ignoring: %m", *i);
×
3114
                        continue;
×
3115
                }
3116

3117
                c = PEM_read_X509(f, NULL, NULL, NULL);
68✔
3118
                if (!c) {
68✔
3119
                        log_debug("Failed to load X509 certificate '%s', ignoring.", *i);
×
3120
                        continue;
×
3121
                }
3122

3123
                if (sk_X509_push(sk, c) == 0)
68✔
3124
                        return log_oom_debug();
×
3125

3126
                TAKE_PTR(c);
68✔
3127
        }
3128

3129
        r = PKCS7_verify(p7, sk, NULL, bio, NULL, PKCS7_NOINTERN|PKCS7_NOVERIFY);
41✔
3130
        if (r)
41✔
3131
                log_debug("Userspace PKCS#7 validation succeeded.");
39✔
3132
        else
3133
                log_debug("Userspace PKCS#7 validation failed: %s", ERR_error_string(ERR_get_error(), NULL));
2✔
3134

3135
        return r;
3136
#else
3137
        log_debug("Not doing client-side validation of dm-verity root hash signatures, OpenSSL support disabled.");
3138
        return 0;
3139
#endif
3140
}
3141

3142
static int do_crypt_activate_verity(
75✔
3143
                struct crypt_device *cd,
3144
                const char *root,
3145
                const char *name,
3146
                const VeritySettings *verity,
3147
                DissectImageFlags flags,
3148
                PartitionPolicyFlags policy_flags) {
3149

3150
        int r;
75✔
3151

3152
        assert(cd);
75✔
3153
        assert(name);
75✔
3154
        assert(verity);
75✔
3155

3156
        bool check_signature;
75✔
3157
        if (iovec_is_set(&verity->root_hash_sig) && FLAGS_SET(policy_flags, PARTITION_POLICY_SIGNED)) {
147✔
3158
                r = secure_getenv_bool("SYSTEMD_DISSECT_VERITY_SIGNATURE");
72✔
3159
                if (r < 0 && r != -ENXIO)
72✔
3160
                        log_debug_errno(r, "Failed to parse $SYSTEMD_DISSECT_VERITY_SIGNATURE");
×
3161

3162
                check_signature = r != 0;
72✔
3163
        } else
3164
                check_signature = false;
3165

3166
        bool measure_signature;
72✔
3167
        if (check_signature) {
72✔
3168
                int k;
72✔
3169

3170
                /* First, if we have support for signed keys in the kernel, then try that first. */
3171
                r = sym_crypt_activate_by_signed_key(
144✔
3172
                                cd,
3173
                                name,
3174
                                verity->root_hash.iov_base,
72✔
3175
                                verity->root_hash.iov_len,
72✔
3176
                                verity->root_hash_sig.iov_base,
72✔
3177
                                verity->root_hash_sig.iov_len,
72✔
3178
                                CRYPT_ACTIVATE_READONLY);
3179
                if (r >= 0) {
72✔
3180
                        log_debug("Verity activation via kernel signature logic worked.");
31✔
3181
                        measure_signature = true;
31✔
3182
                        goto done;
31✔
3183
                }
3184

3185
                log_debug_errno(r, "Validation of dm-verity signature failed via the kernel, trying userspace validation instead: %m");
41✔
3186

3187
                /* Let's mangle ENOKEY → EDESTADDRREQ, so that we return a clear, recognizable error if
3188
                 * there's a signature we don't recognize, that is distinct from the LUKS/encryption
3189
                 * -ENOKEY, which means "password required, but I have none". */
3190
                if (r == -ENOKEY)
41✔
3191
                        r = -EDESTADDRREQ;
41✔
3192

3193
                /* So this didn't work via the kernel, then let's try userspace validation instead. If that
3194
                 * works we'll try to activate without telling the kernel the signature. */
3195

3196
                /* Preferably propagate the original kernel error, so that the fallback logic can work,
3197
                 * as the device-mapper is finicky around concurrent activations of the same volume */
3198
                k = validate_signature_userspace(verity, root, flags);
41✔
3199
                if (k < 0)
41✔
3200
                        return k;
3201
                if (k == 0) {
41✔
3202
                        log_debug("Activation of signed Verity volume worked neither via the kernel nor in userspace, can't activate.");
2✔
3203

3204
                        /* So if we had a signature and we're supposed to exclusively allow
3205
                         * signature-based activation, then return the error now */
3206
                        if (!FLAGS_SET(policy_flags, PARTITION_POLICY_VERITY))
2✔
3207
                                return r < 0 ? r : -EDESTADDRREQ;
1✔
3208

3209
                        log_debug("Activation of signed Verity volume without validating signature is permitted by policy. Continuing.");
1✔
3210
                } else
3211
                        log_debug("Verity activation via userspace signature logic worked, activating by root hash.");
39✔
3212

3213
                /* Otherwise let's see what signature-less activation results in. */
3214

3215
                measure_signature = true;
3216

3217
        } else if (!FLAGS_SET(policy_flags, PARTITION_POLICY_VERITY))
3✔
3218
                return log_debug_errno(SYNTHETIC_ERRNO(ERFKILL),
×
3219
                                       "No-signature activation of Verity volume not allowed by policy, refusing.");
3220
        else
3221
                measure_signature = false;
3222

3223
        r = sym_crypt_activate_by_volume_key(
86✔
3224
                        cd,
3225
                        name,
3226
                        verity->root_hash.iov_base,
43✔
3227
                        verity->root_hash.iov_len,
43✔
3228
                        CRYPT_ACTIVATE_READONLY);
3229
        if (r < 0)
43✔
3230
                return log_debug_errno(r, "Activation of Verity via root hash failed: %m");
×
3231

3232
        log_debug("Activation of Verity via root hash succeeded.");
43✔
3233

3234
done:
43✔
3235
        (void) pcrextend_verity_now(
74✔
3236
                        name,
3237
                        &verity->root_hash,
3238
                        measure_signature ? &verity->root_hash_sig : NULL);
3239
        return 0;
74✔
3240
}
3241

3242
static usec_t verity_timeout(void) {
×
3243
        usec_t t = 100 * USEC_PER_MSEC;
×
3244
        const char *e;
×
3245
        int r;
×
3246

3247
        /* On slower machines, like non-KVM vm, setting up device may take a long time.
3248
         * Let's make the timeout configurable. */
3249

3250
        e = getenv("SYSTEMD_DISSECT_VERITY_TIMEOUT_SEC");
×
3251
        if (!e)
×
3252
                return t;
×
3253

3254
        r = parse_sec(e, &t);
×
3255
        if (r < 0)
×
3256
                log_debug_errno(r,
×
3257
                                "Failed to parse timeout specified in $SYSTEMD_DISSECT_VERITY_TIMEOUT_SEC, "
3258
                                "using the default timeout (%s).",
3259
                                FORMAT_TIMESPAN(t, USEC_PER_MSEC));
3260

3261
        return t;
×
3262
}
3263

3264
static int verity_partition(
86✔
3265
                DissectedImage *di,
3266
                PartitionDesignator designator,
3267
                DissectedPartition *m, /* data partition */
3268
                DissectedPartition *v, /* verity partition */
3269
                const char *root, /* The root to get user verity certs from (for a sysext) */
3270
                const VeritySettings *verity,
3271
                DissectImageFlags flags,
3272
                PartitionPolicyFlags policy_flags,
3273
                DecryptedImage *d) {
3274

3275
        _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
×
3276
        _cleanup_free_ char *node = NULL, *name = NULL;
86✔
3277
        _cleanup_close_ int mount_node_fd = -EBADF;
86✔
3278
        int r;
86✔
3279

3280
        assert(di);
86✔
3281
        assert(m);
86✔
3282
        assert(v || (verity && verity->data_path));
86✔
3283

3284
        if (!verity || !iovec_is_set(&verity->root_hash))
170✔
3285
                return 0;
3286
        if (!((verity->designator < 0 && designator == PARTITION_ROOT) ||
84✔
3287
              (verity->designator == designator)))
3288
                return 0;
3289

3290
        if (!m->found || !m->node || !m->fstype)
83✔
3291
                return 0;
3292
        if (!verity->data_path) {
83✔
3293
                if (!v->found || !v->node || !v->fstype)
52✔
3294
                        return 0;
3295

3296
                if (!streq(v->fstype, "DM_verity_hash"))
52✔
3297
                        return 0;
3298
        }
3299

3300
        if (!(policy_flags & (PARTITION_POLICY_VERITY|PARTITION_POLICY_SIGNED))) {
83✔
3301
                log_debug("Attempted to unlock partition via Verity, but it's prohibited, skipping.");
×
3302
                return 0;
×
3303
        }
3304

3305
        r = dlopen_cryptsetup();
83✔
3306
        if (r < 0)
83✔
3307
                return r;
3308

3309
        if (FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE)) {
83✔
3310
                /* Use the roothash, which is unique per volume, as the device node name, so that it can be reused */
3311
                _cleanup_free_ char *root_hash_encoded = NULL;
24✔
3312

3313
                root_hash_encoded = hexmem(verity->root_hash.iov_base, verity->root_hash.iov_len);
24✔
3314
                if (!root_hash_encoded)
24✔
3315
                        return -ENOMEM;
×
3316

3317
                r = make_dm_name_and_node(root_hash_encoded, /* diskseq= */ 0, "-verity", &name, &node);
24✔
3318
        } else
3319
                r = make_dm_name_and_node_from_node(m->node, dissected_image_diskseq(di), "-verity", &name, &node);
59✔
3320
        if (r < 0)
83✔
3321
                return r;
3322

3323
        r = sym_crypt_init(&cd, verity->data_path ?: v->node);
83✔
3324
        if (r < 0)
83✔
3325
                return r;
3326

3327
        cryptsetup_enable_logging(cd);
83✔
3328

3329
        r = sym_crypt_load(cd, CRYPT_VERITY, NULL);
83✔
3330
        if (r < 0)
83✔
3331
                return r;
3332

3333
        r = sym_crypt_set_data_device(cd, m->node);
83✔
3334
        if (r < 0)
83✔
3335
                return r;
3336

3337
        if (!GREEDY_REALLOC0(d->decrypted, d->n_decrypted + 1))
83✔
3338
                return -ENOMEM;
3339

3340
        /* If activating fails because the device already exists, check the metadata and reuse it if it matches.
3341
         * In case of ENODEV/ENOENT, which can happen if another process is activating at the exact same time,
3342
         * retry a few times before giving up. */
3343
        for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) {
83✔
3344
                _cleanup_(dm_deferred_remove_cleanp) char *restore_deferred_remove = NULL;
83✔
3345
                _cleanup_(sym_crypt_freep) struct crypt_device *existing_cd = NULL;
×
3346
                _cleanup_close_ int fd = -EBADF;
83✔
3347

3348
                /* First, check if the device already exists. */
3349
                fd = open(node, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
83✔
3350
                if (fd < 0 && !ERRNO_IS_DEVICE_ABSENT(errno))
83✔
3351
                        return log_debug_errno(errno, "Failed to open verity device %s: %m", node);
×
3352
                if (fd >= 0)
83✔
3353
                        goto check; /* The device already exists. Let's check it. */
8✔
3354

3355
                /* The symlink to the device node does not exist yet. Assume not activated, and let's activate it. */
3356
                r = do_crypt_activate_verity(cd, root, name, verity, flags, policy_flags);
75✔
3357
                if (r >= 0)
75✔
3358
                        goto try_open; /* The device is activated. Let's open it. */
74✔
3359
                /* libdevmapper can return EINVAL when the device is already in the activation stage.
3360
                 * There's no way to distinguish this situation from a genuine error due to invalid
3361
                 * parameters, so immediately fall back to activating the device with a unique name.
3362
                 * Improvements in libcrypsetup can ensure this never happens:
3363
                 * https://gitlab.com/cryptsetup/cryptsetup/-/merge_requests/96 */
3364
                if (r == -EINVAL && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
1✔
3365
                        break;
3366
                /* Volume is being opened but not ready, crypt_init_by_name would fail, try to open again if
3367
                 * sharing is enabled. */
3368
                if (r == -ENODEV && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
1✔
3369
                        goto try_again;
×
3370
                if (!IN_SET(r,
1✔
3371
                            -EEXIST, /* Volume has already been opened and ready to be used. */
3372
                            -EBUSY   /* Volume is being opened but not ready, crypt_init_by_name() can fetch details. */))
3373
                        return log_debug_errno(r, "Failed to activate verity device %s: %m", node);
1✔
3374

3375
        check:
8✔
3376
                /* To avoid races, disable automatic removal on umount while setting up the new device. Restore it on failure. */
3377
                r = dm_deferred_remove_cancel(name);
8✔
3378
                /* -EBUSY and -ENXIO: the device has already been removed or being removed. We cannot
3379
                 * use the device, try to open again. See target_message() in drivers/md/dm-ioctl.c
3380
                 * and dm_cancel_deferred_remove() in drivers/md/dm.c */
3381
                if (IN_SET(r, -EBUSY, -ENXIO))
8✔
3382
                        goto try_again;
×
3383
                if (r < 0)
8✔
3384
                        return log_debug_errno(r, "Failed to disable automated deferred removal for verity device %s: %m", node);
×
3385

3386
                restore_deferred_remove = strdup(name);
8✔
3387
                if (!restore_deferred_remove)
8✔
3388
                        return log_oom_debug();
×
3389

3390
                r = verity_can_reuse(verity, name, &existing_cd);
8✔
3391
                /* Same as above, -EINVAL can randomly happen when it actually means -EEXIST */
3392
                if (r == -EINVAL && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
8✔
3393
                        break;
3394
                if (IN_SET(r,
8✔
3395
                           -ENOENT, /* Removed?? */
3396
                           -EBUSY,  /* Volume is being opened but not ready, crypt_init_by_name() can fetch details. */
3397
                           -ENODEV  /* Volume is being opened but not ready, crypt_init_by_name() would fail, try to open again. */ ))
3398
                        goto try_again;
×
3399
                if (r < 0)
8✔
3400
                        return log_debug_errno(r, "Failed to check if existing verity device %s can be reused: %m", node);
×
3401

3402
                if (fd < 0) {
8✔
3403
                        /* devmapper might say that the device exists, but the devlink might not yet have been
3404
                         * created. Check and wait for the udev event in that case. */
3405
                        r = device_wait_for_devlink(node, "block", verity_timeout(), NULL);
×
3406
                        /* Fallback to activation with a unique device if it's taking too long */
3407
                        if (r == -ETIMEDOUT && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
×
3408
                                break;
3409
                        if (r < 0)
×
3410
                                return log_debug_errno(r, "Failed to wait device node symlink %s: %m", node);
×
3411
                }
3412

3413
        try_open:
×
3414
                if (fd < 0) {
74✔
3415
                        /* Now, the device is activated and devlink is created. Let's open it. */
3416
                        fd = open(node, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
74✔
3417
                        if (fd < 0) {
74✔
3418
                                if (!ERRNO_IS_DEVICE_ABSENT(errno))
×
3419
                                        return log_debug_errno(errno, "Failed to open verity device %s: %m", node);
×
3420

3421
                                /* The device has already been removed?? */
3422
                                goto try_again;
×
3423
                        }
3424
                }
3425

3426
                /* Everything looks good and we'll be able to mount the device, so deferred remove will be re-enabled at that point. */
3427
                restore_deferred_remove = mfree(restore_deferred_remove);
82✔
3428

3429
                mount_node_fd = TAKE_FD(fd);
82✔
3430
                if (existing_cd)
82✔
3431
                        crypt_free_and_replace(cd, existing_cd);
8✔
3432

3433
                goto success;
82✔
3434

3435
        try_again:
×
3436
                /* Device is being removed by another process. Let's wait for a while. */
3437
                (void) usleep_safe(2 * USEC_PER_MSEC);
×
3438
        }
3439

3440
        /* All trials failed or a conflicting verity device exists. Let's try to activate with a unique name. */
3441
        if (FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE)) {
×
3442
                /* Before trying to activate with unique name, we need to free crypt_device object.
3443
                 * Otherwise, we get error from libcryptsetup like the following:
3444
                 * ------
3445
                 * systemd[1234]: Cannot use device /dev/loop5 which is in use (already mapped or mounted).
3446
                 * ------
3447
                 */
3448
                sym_crypt_free(cd);
×
3449
                cd = NULL;
×
3450
                return verity_partition(
×
3451
                                di,
3452
                                designator,
3453
                                m,
3454
                                v,
3455
                                root,
3456
                                verity,
3457
                                flags & ~DISSECT_IMAGE_VERITY_SHARE,
×
3458
                                policy_flags,
3459
                                d);
3460
        }
3461

3462
        return log_debug_errno(SYNTHETIC_ERRNO(EBUSY), "All attempts to activate verity device %s failed.", name);
×
3463

3464
success:
82✔
3465
        d->decrypted[d->n_decrypted++] = (DecryptedPartition) {
82✔
3466
                .name = TAKE_PTR(name),
82✔
3467
                .device = TAKE_PTR(cd),
82✔
3468
        };
3469

3470
        m->decrypted_node = TAKE_PTR(node);
82✔
3471
        close_and_replace(m->mount_node_fd, mount_node_fd);
82✔
3472

3473
        return 0;
82✔
3474
}
3475
#endif
3476

3477
int dissected_image_decrypt(
278✔
3478
                DissectedImage *m,
3479
                const char *root, /* The root to get user verity certs from (for a sysext) */
3480
                const char *passphrase,
3481
                const VeritySettings *verity,
3482
                const ImagePolicy *policy,
3483
                DissectImageFlags flags) {
3484

3485
#if HAVE_LIBCRYPTSETUP
3486
        _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
278✔
3487
#endif
3488
        int r;
278✔
3489

3490
        assert(m);
278✔
3491
        assert(!verity || iovec_is_valid(&verity->root_hash));
278✔
3492
        assert(!verity || iovec_is_valid(&verity->root_hash_sig));
278✔
3493

3494
        /* Returns:
3495
         *
3496
         *      = 0           → There was nothing to decrypt/setup
3497
         *      > 0           → Decrypted/setup successfully
3498
         *      -ENOKEY       → dm-crypt: there's something to decrypt but no decryption key was supplied
3499
         *      -EKEYREJECTED → dm-crypt: Passed key was not correct
3500
         *      -EDESTADDRREQ → dm-verity: there's something to setup but no signature was supplied
3501
         *      -EBUSY        → dm-verity: Generic Verity error (kernel is not very explanatory)
3502
         *      -ERFKILL      → image policy not compatible with request
3503
         *      -EEXIST       → DM device already exists under the specified name
3504
         */
3505

3506
        if (verity && iovec_is_set(&verity->root_hash) && verity->root_hash.iov_len < sizeof(sd_id128_t))
361✔
3507
                return -EINVAL;
3508

3509
        if (!m->encrypted && !m->verity_ready)
278✔
3510
                return 0;
3511

3512
        r = secure_getenv_bool("SYSTEMD_VERITY_SHARING");
85✔
3513
        if (r >= 0)
85✔
3514
                SET_FLAG(flags, DISSECT_IMAGE_VERITY_SHARE, r);
3✔
3515

3516
#if HAVE_LIBCRYPTSETUP
3517
        r = decrypted_image_new(&d);
85✔
3518
        if (r < 0)
85✔
3519
                return r;
3520

3521
        for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1,177✔
3522
                DissectedPartition *p = m->partitions + i;
1,093✔
3523
                PartitionDesignator k;
1,093✔
3524

3525
                if (!p->found)
1,093✔
3526
                        continue;
900✔
3527

3528
                PartitionPolicyFlags fl = image_policy_get_exhaustively(policy, i);
193✔
3529

3530
                r = decrypt_partition(m, p, passphrase, flags, fl, d);
193✔
3531
                if (r < 0)
193✔
3532
                        return r;
3533

3534
                k = partition_verity_hash_of(i);
193✔
3535
                if (k >= 0) {
193✔
3536
                        r = verity_partition(m, i, p, m->partitions + k, root, verity, flags, fl, d);
86✔
3537
                        if (r < 0)
86✔
3538
                                return r;
3539
                }
3540

3541
                if (!p->decrypted_fstype && p->mount_node_fd >= 0 && p->decrypted_node) {
192✔
3542
                        r = probe_filesystem_full(p->mount_node_fd, p->decrypted_node, 0, UINT64_MAX, /* bool restrict_fstypes= */ true, &p->decrypted_fstype);
84✔
3543
                        if (r < 0 && r != -EUCLEAN)
84✔
3544
                                return r;
3545
                }
3546
        }
3547

3548
        m->decrypted_image = TAKE_PTR(d);
84✔
3549
        return 1;
84✔
3550
#else
3551
        return -EOPNOTSUPP;
3552
#endif
3553
}
3554

3555
int dissected_image_decrypt_interactively(
73✔
3556
                DissectedImage *m,
3557
                const char *passphrase,
3558
                const VeritySettings *verity,
3559
                const ImagePolicy *image_policy,
3560
                DissectImageFlags flags) {
3561

3562
        _cleanup_strv_free_erase_ char **z = NULL;
73✔
3563
        int n = 3, r;
73✔
3564

3565
        if (passphrase)
73✔
3566
                n--;
2✔
3567

3568
        for (;;) {
73✔
3569
                r = dissected_image_decrypt(m, /* root= */ NULL, passphrase, verity, image_policy, flags);
146✔
3570
                if (r >= 0)
73✔
3571
                        return r;
3572
                if (r == -EKEYREJECTED)
1✔
3573
                        log_error_errno(r, "Incorrect passphrase, try again!");
×
3574
                else if (r == -EDESTADDRREQ)
1✔
3575
                        return log_error_errno(r, "Image lacks recognized signature.");
1✔
3576
                else if (r == -ERFKILL)
×
3577
                        return log_error_errno(r, "Unlocking of Verity/LUKS volumes not permitted by policy.");
×
3578
                else if (r != -ENOKEY)
×
3579
                        return log_error_errno(r, "Failed to decrypt/set up image: %m");
×
3580

3581
                if (--n < 0)
×
3582
                        return log_error_errno(SYNTHETIC_ERRNO(EKEYREJECTED),
×
3583
                                               "Too many retries.");
3584

3585
                z = strv_free_erase(z);
×
3586

3587
                static const AskPasswordRequest req = {
×
3588
                        .tty_fd = -EBADF,
3589
                        .message = "Please enter image passphrase:",
3590
                        .id = "dissect",
3591
                        .keyring = "dissect",
3592
                        .credential = "dissect.passphrase",
3593
                        .until = USEC_INFINITY,
3594
                        .hup_fd = -EBADF,
3595
                };
3596

3597
                r = ask_password_auto(&req, /* flags= */ 0, &z);
×
3598
                if (r < 0)
×
3599
                        return log_error_errno(r, "Failed to query for passphrase: %m");
×
3600

3601
                assert(!strv_isempty(z));
73✔
3602
                passphrase = z[0];
3603
        }
3604
}
3605

3606
static int decrypted_image_relinquish(DecryptedImage *d) {
59✔
3607
        assert(d);
59✔
3608

3609
        /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a
3610
         * boolean so that we don't clean it up ourselves either anymore */
3611

3612
#if HAVE_LIBCRYPTSETUP
3613
        int r;
3614

3615
        for (size_t i = 0; i < d->n_decrypted; i++) {
118✔
3616
                DecryptedPartition *p = d->decrypted + i;
59✔
3617

3618
                if (p->relinquished)
59✔
3619
                        continue;
×
3620

3621
                r = sym_crypt_deactivate_by_name(NULL, p->name, CRYPT_DEACTIVATE_DEFERRED);
59✔
3622
                if (r < 0)
59✔
3623
                        return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
×
3624

3625
                p->relinquished = true;
59✔
3626
        }
3627
#endif
3628

3629
        return 0;
3630
}
3631

3632
int dissected_image_relinquish(DissectedImage *m) {
200✔
3633
        int r;
200✔
3634

3635
        assert(m);
200✔
3636

3637
        if (m->decrypted_image) {
200✔
3638
                r = decrypted_image_relinquish(m->decrypted_image);
59✔
3639
                if (r < 0)
59✔
3640
                        return r;
3641
        }
3642

3643
        if (m->loop)
200✔
3644
                loop_device_relinquish(m->loop);
195✔
3645

3646
        return 0;
3647
}
3648

3649
void image_filter_done(ImageFilter *f) {
5✔
3650
        assert(f);
5✔
3651

3652
        FOREACH_ELEMENT(p, f->pattern)
70✔
3653
                *p = mfree(*p);
65✔
3654
}
5✔
3655

3656
ImageFilter *image_filter_free(ImageFilter *f) {
114✔
3657
        if (!f)
114✔
3658
                return NULL;
3659

3660
        image_filter_done(f);
5✔
3661
        return mfree(f);
5✔
3662
}
3663

3664
int image_filter_parse(const char *s, ImageFilter **ret) {
10✔
3665
        _cleanup_(image_filter_freep) ImageFilter *f = NULL;
10✔
3666
        int r;
10✔
3667

3668
        if (isempty(s)) {
10✔
3669
                if (ret)
2✔
3670
                        *ret = NULL;
2✔
3671
                return 0;
2✔
3672
        }
3673

3674
        for (;;) {
28✔
3675
                _cleanup_free_ char *word = NULL;
5✔
3676

3677
                r = extract_first_word(&s, &word, ":", EXTRACT_UNQUOTE|EXTRACT_DONT_COALESCE_SEPARATORS);
18✔
3678
                if (r < 0)
18✔
3679
                        return log_debug_errno(r, "Failed to extract word: %m");
×
3680
                if (r == 0)
18✔
3681
                        break;
3682

3683
                _cleanup_free_ char *designator = NULL, *pattern = NULL;
15✔
3684
                const char *x = word;
15✔
3685
                r = extract_many_words(&x, "=", EXTRACT_UNQUOTE|EXTRACT_DONT_COALESCE_SEPARATORS, &designator, &pattern);
15✔
3686
                if (r < 0)
15✔
3687
                        return log_debug_errno(r, "Failed to extract designator: %m");
×
3688
                if (r != 2 || !isempty(x))
15✔
3689
                        return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Unable to split: %s", word);
3✔
3690

3691
                PartitionDesignator d = partition_designator_from_string(designator);
12✔
3692
                if (d < 0)
12✔
3693
                        return log_debug_errno(d, "Failed to parse partition designator: %s", designator);
1✔
3694

3695
                if (!f) {
11✔
3696
                        f = new0(ImageFilter, 1);
5✔
3697
                        if (!f)
5✔
3698
                                return log_oom_debug();
×
3699
                } else if (f->pattern[d])
6✔
3700
                        return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Duplicate pattern for '%s', refusing.", partition_designator_to_string(d));
1✔
3701

3702
                f->pattern[d] = TAKE_PTR(pattern);
10✔
3703
        }
3704

3705
        if (ret)
3✔
3706
                *ret = TAKE_PTR(f);
3✔
3707

3708
        return 0;
3709
}
3710

3711
static char *build_auxiliary_path(const char *image, const char *suffix) {
7,302✔
3712
        const char *e;
7,302✔
3713
        char *n;
7,302✔
3714

3715
        assert(image);
7,302✔
3716
        assert(suffix);
7,302✔
3717

3718
        e = endswith(image, ".raw");
7,302✔
3719
        if (!e)
7,302✔
3720
                return strjoin(e, suffix);
252✔
3721

3722
        n = new(char, e - image + strlen(suffix) + 1);
7,050✔
3723
        if (!n)
7,050✔
3724
                return NULL;
3725

3726
        strcpy(mempcpy(n, image, e - image), suffix);
7,050✔
3727
        return n;
7,050✔
3728
}
3729

3730
void verity_settings_done(VeritySettings *v) {
30,809✔
3731
        assert(v);
30,809✔
3732

3733
        iovec_done(&v->root_hash);
30,809✔
3734
        iovec_done(&v->root_hash_sig);
30,809✔
3735
        v->data_path = mfree(v->data_path);
30,809✔
3736
}
30,809✔
3737

3738
VeritySettings* verity_settings_free(VeritySettings *v) {
2✔
3739
        if (!v)
2✔
3740
                return NULL;
3741

3742
        verity_settings_done(v);
2✔
3743
        return mfree(v);
2✔
3744
}
3745

3746
void verity_settings_hash_func(const VeritySettings *s, struct siphash *state) {
4✔
3747
        assert(s);
4✔
3748

3749
        siphash24_compress_typesafe(s->root_hash.iov_len, state);
4✔
3750
        siphash24_compress(s->root_hash.iov_base, s->root_hash.iov_len, state);
4✔
3751
}
4✔
3752

3753
int verity_settings_compare_func(const VeritySettings *x, const VeritySettings *y) {
2✔
3754
        assert(x);
2✔
3755
        assert(y);
2✔
3756

3757
        return iovec_memcmp(&x->root_hash, &y->root_hash);
2✔
3758
}
3759

3760
DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(verity_settings_hash_ops, VeritySettings, verity_settings_hash_func, verity_settings_compare_func, VeritySettings, verity_settings_free);
2✔
3761

3762
int verity_settings_load(
2,445✔
3763
                VeritySettings *verity,
3764
                const char *image,
3765
                const char *root_hash_path,
3766
                const char *root_hash_sig_path) {
3767

3768
        PartitionDesignator designator;
2,445✔
3769
        int r;
2,445✔
3770

3771
        assert(verity);
2,445✔
3772
        assert(image);
2,445✔
3773
        assert(verity->designator < 0 || IN_SET(verity->designator, PARTITION_ROOT, PARTITION_USR));
2,445✔
3774

3775
        /* If we are asked to load the root hash for a device node, exit early */
3776
        if (is_device_path(image))
2,445✔
3777
                return 0;
2,445✔
3778

3779
        r = secure_getenv_bool("SYSTEMD_DISSECT_VERITY_SIDECAR");
2,445✔
3780
        if (r < 0 && r != -ENXIO)
2,445✔
3781
                log_debug_errno(r, "Failed to parse $SYSTEMD_DISSECT_VERITY_SIDECAR, ignoring: %m");
×
3782
        if (r == 0)
2,445✔
3783
                return 0;
3784

3785
        designator = verity->designator;
2,445✔
3786

3787
        /* We only fill in what isn't already filled in */
3788

3789
        _cleanup_(iovec_done) struct iovec root_hash = {};
2,445✔
3790
        if (!iovec_is_set(&verity->root_hash)) {
2,445✔
3791
                _cleanup_free_ char *text = NULL;
2,416✔
3792

3793
                if (root_hash_path) {
2,416✔
3794
                        /* If explicitly specified it takes precedence */
3795
                        r = read_one_line_file(root_hash_path, &text);
×
3796
                        if (r < 0)
×
3797
                                return r;
3798

3799
                        if (designator < 0)
×
3800
                                designator = PARTITION_ROOT;
×
3801
                } else {
3802
                        /* Otherwise look for xattr and separate file, and first for the data for root and if
3803
                         * that doesn't exist for /usr */
3804

3805
                        if (designator < 0 || designator == PARTITION_ROOT) {
2,416✔
3806
                                r = getxattr_malloc(image, "user.verity.roothash", &text, /* ret_size= */ NULL);
2,416✔
3807
                                if (r < 0) {
2,416✔
3808
                                        _cleanup_free_ char *p = NULL;
2,416✔
3809

3810
                                        if (r != -ENOENT && !ERRNO_IS_XATTR_ABSENT(r))
2,416✔
3811
                                                return r;
3812

3813
                                        p = build_auxiliary_path(image, ".roothash");
2,416✔
3814
                                        if (!p)
2,416✔
3815
                                                return -ENOMEM;
3816

3817
                                        r = read_one_line_file(p, &text);
2,416✔
3818
                                        if (r < 0 && r != -ENOENT)
2,416✔
3819
                                                return r;
3820
                                }
3821

3822
                                if (text)
2,416✔
3823
                                        designator = PARTITION_ROOT;
1,723✔
3824
                        }
3825

3826
                        if (!text && (designator < 0 || designator == PARTITION_USR)) {
2,416✔
3827
                                /* So in the "roothash" xattr/file name above the "root" of course primarily
3828
                                 * refers to the root of the Verity Merkle tree. But coincidentally it also
3829
                                 * is the hash for the *root* file system, i.e. the "root" neatly refers to
3830
                                 * two distinct concepts called "root". Taking benefit of this happy
3831
                                 * coincidence we call the file with the root hash for the /usr/ file system
3832
                                 * `usrhash`, because `usrroothash` or `rootusrhash` would just be too
3833
                                 * confusing. We thus drop the reference to the root of the Merkle tree, and
3834
                                 * just indicate which file system it's about. */
3835
                                r = getxattr_malloc(image, "user.verity.usrhash", &text, /* ret_size= */ NULL);
692✔
3836
                                if (r < 0) {
692✔
3837
                                        _cleanup_free_ char *p = NULL;
692✔
3838

3839
                                        if (r != -ENOENT && !ERRNO_IS_XATTR_ABSENT(r))
692✔
3840
                                                return r;
3841

3842
                                        p = build_auxiliary_path(image, ".usrhash");
692✔
3843
                                        if (!p)
692✔
3844
                                                return -ENOMEM;
3845

3846
                                        r = read_one_line_file(p, &text);
692✔
3847
                                        if (r < 0 && r != -ENOENT)
692✔
3848
                                                return r;
3849
                                }
3850

3851
                                if (text)
692✔
3852
                                        designator = PARTITION_USR;
×
3853
                        }
3854
                }
3855

3856
                if (text) {
2,416✔
3857
                        r = unhexmem(text, &root_hash.iov_base, &root_hash.iov_len);
1,723✔
3858
                        if (r < 0)
1,723✔
3859
                                return r;
3860
                        if (root_hash.iov_len < sizeof(sd_id128_t))
1,723✔
3861
                                return -EINVAL;
3862
                }
3863
        }
3864

3865
        _cleanup_(iovec_done) struct iovec root_hash_sig = {};
2,445✔
3866
        if ((iovec_is_set(&root_hash) || iovec_is_set(&verity->root_hash)) && !iovec_is_set(&verity->root_hash_sig)) {
4,197✔
3867
                if (root_hash_sig_path) {
1,752✔
3868
                        r = read_full_file(root_hash_sig_path, (char**) &root_hash_sig.iov_base, &root_hash_sig.iov_len);
×
3869
                        if (r < 0 && r != -ENOENT)
×
3870
                                return r;
3871

3872
                        if (r >= 0 && root_hash_sig.iov_len == 0) /* refuse empty size signatures */
×
3873
                                return -EINVAL;
3874

3875
                        if (designator < 0)
×
3876
                                designator = PARTITION_ROOT;
×
3877
                } else {
3878
                        if (designator < 0 || designator == PARTITION_ROOT) {
1,752✔
3879
                                _cleanup_free_ char *p = NULL;
1,749✔
3880

3881
                                /* Follow naming convention recommended by the relevant RFC:
3882
                                 * https://tools.ietf.org/html/rfc5751#section-3.2.1 */
3883
                                p = build_auxiliary_path(image, ".roothash.p7s");
1,749✔
3884
                                if (!p)
1,749✔
3885
                                        return -ENOMEM;
3886

3887
                                r = read_full_file(p, (char**) &root_hash_sig.iov_base, &root_hash_sig.iov_len);
1,749✔
3888
                                if (r < 0 && r != -ENOENT)
1,749✔
3889
                                        return r;
3890
                                if (r >= 0) {
1,749✔
3891
                                        designator = PARTITION_ROOT;
1,724✔
3892
                                        if (root_hash_sig.iov_len == 0) /* refuse empty size signatures */
1,724✔
3893
                                                return -EINVAL;
3894
                                }
3895
                        }
3896

3897
                        if (!iovec_is_set(&root_hash_sig) && (designator < 0 || designator == PARTITION_USR)) {
1,752✔
3898
                                _cleanup_free_ char *p = NULL;
3✔
3899

3900
                                p = build_auxiliary_path(image, ".usrhash.p7s");
3✔
3901
                                if (!p)
3✔
3902
                                        return -ENOMEM;
3903

3904
                                r = read_full_file(p, (char**) &root_hash_sig.iov_base, &root_hash_sig.iov_len);
3✔
3905
                                if (r < 0 && r != -ENOENT)
3✔
3906
                                        return r;
3907
                                if (r >= 0) {
3✔
3908
                                        designator = PARTITION_USR;
×
3909
                                        if (root_hash_sig.iov_len == 0) /* refuse empty size signatures */
×
3910
                                                return -EINVAL;
3911
                                }
3912
                        }
3913
                }
3914
        }
3915

3916
        _cleanup_free_ char *verity_data_path = NULL;
2,445✔
3917
        if (!verity->data_path) {
2,445✔
3918
                _cleanup_free_ char *p = NULL;
2,442✔
3919

3920
                p = build_auxiliary_path(image, ".verity");
2,442✔
3921
                if (!p)
2,442✔
3922
                        return -ENOMEM;
3923

3924
                if (access(p, F_OK) < 0) {
2,442✔
3925
                        if (errno != ENOENT)
719✔
3926
                                return -errno;
×
3927
                } else
3928
                        verity_data_path = TAKE_PTR(p);
3929
        }
3930

3931
        if (iovec_is_set(&root_hash))
2,445✔
3932
                verity->root_hash = TAKE_STRUCT(root_hash);
1,723✔
3933

3934
        if (iovec_is_set(&root_hash_sig))
2,445✔
3935
                verity->root_hash_sig = TAKE_STRUCT(root_hash_sig);
1,724✔
3936

3937
        if (verity_data_path)
2,445✔
3938
                verity->data_path = TAKE_PTR(verity_data_path);
1,723✔
3939

3940
        if (verity->designator < 0)
2,445✔
3941
                verity->designator = designator;
2,410✔
3942

3943
        return 1;
3944
}
3945

3946
int verity_settings_copy(VeritySettings *dest, const VeritySettings *source) {
105✔
3947
        assert(dest);
105✔
3948

3949
        if (!source) {
105✔
3950
                *dest = VERITY_SETTINGS_DEFAULT;
×
3951
                return 0;
×
3952
        }
3953

3954
        _cleanup_(iovec_done) struct iovec rh = {};
105✔
3955
        if (iovec_is_set(&source->root_hash)) {
105✔
3956
                if (!iovec_memdup(&source->root_hash, &rh))
×
3957
                        return log_oom_debug();
×
3958
        }
3959

3960
        _cleanup_(iovec_done) struct iovec sig = {};
105✔
3961
        if (iovec_is_set(&source->root_hash_sig)) {
105✔
3962
                if (!iovec_memdup(&source->root_hash_sig, &sig))
×
3963
                        return log_oom_debug();
×
3964
        }
3965

3966
        _cleanup_free_ char *p = NULL;
105✔
3967
        if (source->data_path) {
105✔
3968
                p = strdup(source->data_path);
×
3969
                if (!p)
×
3970
                        return log_oom_debug();
×
3971
        }
3972

3973
        *dest = (VeritySettings) {
105✔
3974
                .root_hash = TAKE_STRUCT(rh),
105✔
3975
                .root_hash_sig = TAKE_STRUCT(sig),
105✔
3976
                .data_path = TAKE_PTR(p),
105✔
3977
                .designator = source->designator,
105✔
3978
        };
3979

3980
        return 1;
105✔
3981
}
3982

3983
int dissected_image_load_verity_sig_partition(
2,496✔
3984
                DissectedImage *m,
3985
                int fd,
3986
                VeritySettings *verity) {
3987

3988
        int r;
2,496✔
3989

3990
        assert(m);
2,496✔
3991
        assert(fd >= 0);
2,496✔
3992
        assert(verity);
2,496✔
3993

3994
        if (iovec_is_set(&verity->root_hash) && iovec_is_set(&verity->root_hash_sig)) /* Already loaded? */
4,174✔
3995
                return 0;
2,496✔
3996

3997
        r = secure_getenv_bool("SYSTEMD_DISSECT_VERITY_EMBEDDED");
840✔
3998
        if (r < 0 && r != -ENXIO)
840✔
3999
                log_debug_errno(r, "Failed to parse $SYSTEMD_DISSECT_VERITY_EMBEDDED, ignoring: %m");
×
4000
        if (r == 0)
840✔
4001
                return 0;
4002

4003
        PartitionDesignator dd = verity->designator;
840✔
4004
        if (dd < 0) {
840✔
4005
                if (m->partitions[PARTITION_ROOT_VERITY].found)
818✔
4006
                        dd = PARTITION_ROOT;
4007
                else if (m->partitions[PARTITION_USR_VERITY].found)
773✔
4008
                        dd = PARTITION_USR;
4009
                else
4010
                        return 0;
4011
        }
4012

4013
        if (!m->partitions[dd].found)
68✔
4014
                return 0;
4015

4016
        PartitionDesignator dv = partition_verity_hash_of(dd);
68✔
4017
        assert(dv >= 0);
68✔
4018
        if (!m->partitions[dv].found)
68✔
4019
                return 0;
4020

4021
        PartitionDesignator ds = partition_verity_sig_of(dd);
68✔
4022
        assert(ds >= 0);
68✔
4023

4024
        DissectedPartition *p = m->partitions + ds;
68✔
4025
        if (!p->found)
68✔
4026
                return 0;
4027

4028
        _cleanup_(iovec_done) struct iovec root_hash = {}, root_hash_sig = {};
65✔
4029
        r = acquire_sig_for_roothash(
65✔
4030
                        fd,
4031
                        p->offset,
4032
                        p->size,
4033
                        &root_hash,
4034
                        &root_hash_sig);
4035
        if (r < 0)
65✔
4036
                return r;
4037

4038
        /* Check if specified root hash matches if it is specified */
4039
        if (iovec_is_set(&verity->root_hash) &&
87✔
4040
            iovec_memcmp(&verity->root_hash, &root_hash) != 0) {
22✔
4041
                _cleanup_free_ char *a = NULL, *b = NULL;
×
4042

4043
                a = hexmem(root_hash.iov_base, root_hash.iov_len);
×
4044
                b = hexmem(verity->root_hash.iov_base, verity->root_hash.iov_len);
×
4045

4046
                return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Root hash in signature JSON data (%s) doesn't match configured hash (%s).", strna(a), strna(b));
×
4047
        }
4048

4049
        iovec_done(&verity->root_hash);
65✔
4050
        verity->root_hash = TAKE_STRUCT(root_hash);
65✔
4051

4052
        iovec_done(&verity->root_hash_sig);
65✔
4053
        verity->root_hash_sig = TAKE_STRUCT(root_hash_sig);
65✔
4054

4055
        verity->designator = dd;
65✔
4056

4057
        m->verity_ready = true;
65✔
4058
        m->verity_sig_ready = true;
65✔
4059
        m->partitions[dd].rw = false;
65✔
4060

4061
        return 1;
65✔
4062
}
4063

4064
int dissected_image_guess_verity_roothash(
2,496✔
4065
                DissectedImage *m,
4066
                VeritySettings *verity) {
4067

4068
        int r;
2,496✔
4069

4070
        assert(m);
2,496✔
4071
        assert(verity);
2,496✔
4072

4073
        /* Guesses the Verity root hash from the partitions we found, taking into account that as per
4074
         * https://uapi-group.org/specifications/specs/discoverable_partitions_specification/ the UUIDS of
4075
         * the data and verity partitions are respectively the first and second halves of the dm-verity
4076
         * roothash.
4077
         *
4078
         * Note of course that relying on this guesswork is mostly useful for later attestation, not so much
4079
         * for a-priori security. */
4080

4081
        if (iovec_is_set(&verity->root_hash)) /* Already loaded? */
2,496✔
4082
                return 0;
2,496✔
4083

4084
        r = secure_getenv_bool("SYSTEMD_DISSECT_VERITY_GUESS");
775✔
4085
        if (r < 0 && r != -ENXIO)
775✔
4086
                log_debug_errno(r, "Failed to parse $SYSTEMD_DISSECT_VERITY_GUESS, ignoring: %m");
×
4087
        if (r == 0)
775✔
4088
                return 0;
4089

4090
        PartitionDesignator dd = verity->designator;
775✔
4091
        if (dd < 0) {
775✔
4092
                if (m->partitions[PARTITION_ROOT_VERITY].found)
775✔
4093
                        dd = PARTITION_ROOT;
4094
                else if (m->partitions[PARTITION_USR_VERITY].found)
773✔
4095
                        dd = PARTITION_USR;
4096
                else
4097
                        return 0;
4098
        }
4099

4100
        DissectedPartition *d = m->partitions + dd;
3✔
4101
        if (!d->found)
3✔
4102
                return 0;
4103

4104
        PartitionDesignator dv = partition_verity_hash_of(dd);
3✔
4105
        assert(dv >= 0);
3✔
4106

4107
        DissectedPartition *p = m->partitions + dv;
3✔
4108
        if (!p->found)
3✔
4109
                return 0;
4110

4111
        _cleanup_free_ void *rh = malloc(sizeof(sd_id128_t) * 2);
3✔
4112
        if (!rh)
3✔
4113
                return log_oom_debug();
×
4114

4115
        memcpy(mempcpy(rh, &d->uuid, sizeof(sd_id128_t)), &p->uuid, sizeof(sd_id128_t));
3✔
4116
        verity->root_hash = IOVEC_MAKE(TAKE_PTR(rh), sizeof(sd_id128_t) * 2);
3✔
4117

4118
        verity->designator = dd;
3✔
4119

4120
        m->verity_ready = true;
3✔
4121
        m->partitions[dd].rw = false;
3✔
4122

4123
        return 0;
3✔
4124
}
4125

4126
int dissected_image_acquire_metadata(
113✔
4127
                DissectedImage *m,
4128
                int userns_fd,
4129
                DissectImageFlags extra_flags) {
4130

4131
        enum {
113✔
4132
                META_HOSTNAME,
4133
                META_MACHINE_ID,
4134
                META_MACHINE_INFO,
4135
                META_OS_RELEASE,
4136
                META_INITRD_RELEASE,
4137
                META_SYSEXT_RELEASE,
4138
                META_CONFEXT_RELEASE,
4139
                META_HAS_INIT_SYSTEM,
4140
                _META_MAX,
4141
        };
4142

4143
        static const char *const paths[_META_MAX] = {
113✔
4144
                [META_HOSTNAME]          = "/etc/hostname\0",
4145
                [META_MACHINE_ID]        = "/etc/machine-id\0",
4146
                [META_MACHINE_INFO]      = "/etc/machine-info\0",
4147
                [META_OS_RELEASE]        = "/etc/os-release\0"
4148
                                           "/usr/lib/os-release\0",
4149
                [META_INITRD_RELEASE]    = "/etc/initrd-release\0"
4150
                                           "/usr/lib/initrd-release\0",
4151
                [META_SYSEXT_RELEASE]    = "sysext-release\0",       /* String used only for logging. */
4152
                [META_CONFEXT_RELEASE]   = "confext-release\0",      /* ditto */
4153
                [META_HAS_INIT_SYSTEM]   = "has-init-system\0",      /* ditto */
4154
        };
4155

4156
        _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL, **initrd_release = NULL, **sysext_release = NULL, **confext_release = NULL;
113✔
4157
        _cleanup_free_ char *hostname = NULL, *t = NULL;
113✔
4158
        _cleanup_close_pair_ int error_pipe[2] = EBADF_PAIR;
113✔
4159
        _cleanup_(pidref_done_sigkill_wait) PidRef child = PIDREF_NULL;
113✔
4160
        sd_id128_t machine_id = SD_ID128_NULL;
113✔
4161
        unsigned n_meta_initialized = 0;
113✔
4162
        int fds[2 * _META_MAX], r, v;
113✔
4163
        int has_init_system = -1;
113✔
4164
        ssize_t n;
113✔
4165

4166
        assert(m);
113✔
4167

4168
        r = dlopen_libmount();
113✔
4169
        if (r < 0)
113✔
4170
                return r;
4171

4172
        for (; n_meta_initialized < _META_MAX; n_meta_initialized++) {
1,017✔
4173
                assert(paths[n_meta_initialized]);
904✔
4174

4175
                if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
904✔
4176
                        r = -errno;
×
4177
                        goto finish;
×
4178
                }
4179
        }
4180

4181
        r = get_common_dissect_directory(&t);
113✔
4182
        if (r < 0)
113✔
4183
                goto finish;
×
4184

4185
        if (pipe2(error_pipe, O_CLOEXEC) < 0) {
113✔
4186
                r = -errno;
×
4187
                goto finish;
×
4188
        }
4189

4190
        r = pidref_safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM, &child);
113✔
4191
        if (r < 0)
187✔
4192
                goto finish;
×
4193
        if (r == 0) {
187✔
4194
                /* Child */
4195
                error_pipe[0] = safe_close(error_pipe[0]);
74✔
4196

4197
                if (userns_fd < 0)
74✔
4198
                        r = detach_mount_namespace_harder(0, 0);
73✔
4199
                else
4200
                        r = detach_mount_namespace_userns(userns_fd);
1✔
4201
                if (r < 0) {
74✔
4202
                        log_debug_errno(r, "Failed to detach mount namespace: %m");
×
4203
                        report_errno_and_exit(error_pipe[1], r);
×
4204
                }
4205

4206
                r = dissected_image_mount(
148✔
4207
                                m,
4208
                                t,
4209
                                /* uid_shift= */ UID_INVALID,
4210
                                /* uid_range= */ UID_INVALID,
4211
                                /* userns_fd= */ -EBADF,
4212
                                extra_flags |
4213
                                DISSECT_IMAGE_READ_ONLY |
4214
                                DISSECT_IMAGE_MOUNT_ROOT_ONLY |
74✔
4215
                                DISSECT_IMAGE_USR_NO_ROOT);
4216
                if (r < 0) {
74✔
4217
                        log_debug_errno(r, "Failed to mount dissected image: %m");
×
4218
                        report_errno_and_exit(error_pipe[1], r);
×
4219
                }
4220

4221
                for (unsigned k = 0; k < _META_MAX; k++) {
666✔
4222
                        _cleanup_close_ int fd = -ENOENT;
592✔
4223

4224
                        assert(paths[k]);
592✔
4225

4226
                        fds[2*k] = safe_close(fds[2*k]);
592✔
4227

4228
                        switch (k) {
592✔
4229

4230
                        case META_SYSEXT_RELEASE:
74✔
4231
                                if (!m->image_name)
74✔
4232
                                        goto next;
×
4233

4234
                                /* As per the os-release spec, if the image is an extension it will have a
4235
                                 * file named after the image name in extension-release.d/ - we use the image
4236
                                 * name and try to resolve it with the extension-release helpers, as
4237
                                 * sometimes the image names are mangled on deployment and do not match
4238
                                 * anymore.  Unlike other paths this is not fixed, and the image name can be
4239
                                 * mangled on deployment, so by calling into the helper we allow a fallback
4240
                                 * that matches on the first extension-release file found in the directory,
4241
                                 * if one named after the image cannot be found first. */
4242
                                r = open_extension_release(
74✔
4243
                                                t,
4244
                                                IMAGE_SYSEXT,
4245
                                                m->image_name,
4246
                                                /* relax_extension_release_check= */ false,
4247
                                                /* ret_path= */ NULL,
4248
                                                &fd);
4249
                                if (r < 0)
74✔
4250
                                        fd = r;
33✔
4251
                                break;
4252

4253
                        case META_CONFEXT_RELEASE:
74✔
4254
                                if (!m->image_name)
74✔
4255
                                        goto next;
×
4256

4257
                                /* As above */
4258
                                r = open_extension_release(
74✔
4259
                                                t,
4260
                                                IMAGE_CONFEXT,
4261
                                                m->image_name,
4262
                                                /* relax_extension_release_check= */ false,
4263
                                                /* ret_path= */ NULL,
4264
                                                &fd);
4265
                                if (r < 0)
74✔
4266
                                        fd = r;
69✔
4267

4268
                                break;
4269

4270
                        case META_HAS_INIT_SYSTEM: {
74✔
4271
                                bool found = false;
74✔
4272

4273
                                FOREACH_STRING(init,
281✔
4274
                                               "/usr/lib/systemd/systemd",  /* systemd on /usr/ merged system */
4275
                                               "/lib/systemd/systemd",      /* systemd on /usr/ non-merged systems */
4276
                                               "/sbin/init") {              /* traditional path the Linux kernel invokes */
4277

4278
                                        r = chase(init, t, CHASE_PREFIX_ROOT, NULL, NULL);
222✔
4279
                                        if (r < 0) {
222✔
4280
                                                if (r != -ENOENT)
207✔
4281
                                                        log_debug_errno(r, "Failed to resolve %s, ignoring: %m", init);
207✔
4282
                                        } else {
4283
                                                found = true;
15✔
4284
                                                break;
15✔
4285
                                        }
4286
                                }
4287

4288
                                r = loop_write(fds[2*k+1], &found, sizeof(found));
74✔
4289
                                if (r < 0)
74✔
4290
                                        report_errno_and_exit(error_pipe[1], r);
×
4291

4292
                                goto next;
74✔
4293
                        }
4294

4295
                        default:
4296
                                NULSTR_FOREACH(p, paths[k]) {
840✔
4297
                                        fd = chase_and_open(p, t, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
503✔
4298
                                        if (fd >= 0)
503✔
4299
                                                break;
4300
                                }
4301
                        }
4302

4303
                        if (fd < 0) {
518✔
4304
                                log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]);
439✔
4305
                                goto next;
439✔
4306
                        }
4307

4308
                        r = copy_bytes(fd, fds[2*k+1], UINT64_MAX, 0);
79✔
4309
                        if (r < 0)
79✔
4310
                                report_errno_and_exit(error_pipe[1], r);
×
4311

4312
                next:
79✔
4313
                        fds[2*k+1] = safe_close(fds[2*k+1]);
592✔
4314
                }
4315

4316
                _exit(EXIT_SUCCESS);
74✔
4317
        }
4318

4319
        error_pipe[1] = safe_close(error_pipe[1]);
113✔
4320

4321
        for (unsigned k = 0; k < _META_MAX; k++) {
1,017✔
4322
                _cleanup_fclose_ FILE *f = NULL;
904✔
4323

4324
                assert(paths[k]);
904✔
4325

4326
                fds[2*k+1] = safe_close(fds[2*k+1]);
904✔
4327

4328
                f = take_fdopen(&fds[2*k], "r");
904✔
4329
                if (!f) {
904✔
4330
                        r = -errno;
×
4331
                        goto finish;
×
4332
                }
4333

4334
                switch (k) {
904✔
4335

4336
                case META_HOSTNAME:
113✔
4337
                        r = read_etc_hostname_stream(f, /* substitute_wildcards= */ false, &hostname);
113✔
4338
                        if (r < 0)
113✔
4339
                                log_debug_errno(r, "Failed to read /etc/hostname of image: %m");
113✔
4340

4341
                        break;
4342

4343
                case META_MACHINE_ID: {
113✔
4344
                        _cleanup_free_ char *line = NULL;
113✔
4345

4346
                        r = read_line(f, LONG_LINE_MAX, &line);
113✔
4347
                        if (r < 0)
113✔
4348
                                log_debug_errno(r, "Failed to read /etc/machine-id of image: %m");
×
4349
                        else if (r == 33) {
113✔
4350
                                r = sd_id128_from_string(line, &machine_id);
×
4351
                                if (r < 0)
×
4352
                                        log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
×
4353
                        } else if (r == 0)
113✔
4354
                                log_debug("/etc/machine-id file of image is empty.");
98✔
4355
                        else if (streq(line, "uninitialized"))
15✔
4356
                                log_debug("/etc/machine-id file of image is uninitialized (likely aborted first boot).");
15✔
4357
                        else
4358
                                log_debug("/etc/machine-id file of image has unexpected length %i.", r);
×
4359

4360
                        break;
113✔
4361
                }
4362

4363
                case META_MACHINE_INFO:
113✔
4364
                        r = load_env_file_pairs(f, "machine-info", &machine_info);
113✔
4365
                        if (r < 0)
113✔
4366
                                log_debug_errno(r, "Failed to read /etc/machine-info of image: %m");
×
4367

4368
                        break;
4369

4370
                case META_OS_RELEASE:
113✔
4371
                        r = load_env_file_pairs(f, "os-release", &os_release);
113✔
4372
                        if (r < 0)
113✔
4373
                                log_debug_errno(r, "Failed to read OS release file of image: %m");
×
4374

4375
                        break;
4376

4377
                case META_INITRD_RELEASE:
113✔
4378
                        r = load_env_file_pairs(f, "initrd-release", &initrd_release);
113✔
4379
                        if (r < 0)
113✔
4380
                                log_debug_errno(r, "Failed to read initrd release file of image: %m");
×
4381

4382
                        break;
4383

4384
                case META_SYSEXT_RELEASE:
113✔
4385
                        r = load_env_file_pairs(f, "sysext-release", &sysext_release);
113✔
4386
                        if (r < 0)
113✔
4387
                                log_debug_errno(r, "Failed to read sysext release file of image: %m");
×
4388

4389
                        break;
4390

4391
                case META_CONFEXT_RELEASE:
113✔
4392
                        r = load_env_file_pairs(f, "confext-release", &confext_release);
113✔
4393
                        if (r < 0)
113✔
4394
                                log_debug_errno(r, "Failed to read confext release file of image: %m");
904✔
4395

4396
                        break;
4397

4398
                case META_HAS_INIT_SYSTEM: {
113✔
4399
                        bool b = false;
113✔
4400
                        size_t nr;
113✔
4401

4402
                        errno = 0;
113✔
4403
                        nr = fread(&b, 1, sizeof(b), f);
113✔
4404
                        if (nr != sizeof(b))
113✔
4405
                                log_debug_errno(errno_or_else(EIO), "Failed to read has-init-system boolean: %m");
×
4406
                        else
4407
                                has_init_system = b;
113✔
4408

4409
                        break;
113✔
4410
                }}
4411
        }
4412

4413
        r = pidref_wait_for_terminate_and_check("(sd-dissect)", &child, 0);
113✔
4414
        if (r < 0)
113✔
4415
                goto finish;
×
4416

4417
        pidref_done(&child);
113✔
4418

4419
        n = read(error_pipe[0], &v, sizeof(v));
113✔
4420
        if (n < 0) {
113✔
4421
                r = -errno;
×
4422
                goto finish;
×
4423
        }
4424
        if (n == sizeof(v)) {
113✔
4425
                r = v; /* propagate error sent to us from child */
×
4426
                goto finish;
×
4427
        }
4428
        if (n != 0) {
113✔
4429
                r = -EIO;
×
4430
                goto finish;
×
4431
        }
4432
        if (r != EXIT_SUCCESS) {
113✔
4433
                r = -EPROTO;
×
4434
                goto finish;
×
4435
        }
4436

4437
        free_and_replace(m->hostname, hostname);
113✔
4438
        m->machine_id = machine_id;
113✔
4439
        strv_free_and_replace(m->machine_info, machine_info);
113✔
4440
        strv_free_and_replace(m->os_release, os_release);
113✔
4441
        strv_free_and_replace(m->initrd_release, initrd_release);
113✔
4442
        strv_free_and_replace(m->sysext_release, sysext_release);
113✔
4443
        strv_free_and_replace(m->confext_release, confext_release);
113✔
4444
        m->has_init_system = has_init_system;
113✔
4445

4446
finish:
113✔
4447
        for (unsigned k = 0; k < n_meta_initialized; k++)
1,017✔
4448
                safe_close_pair(fds + 2*k);
904✔
4449

4450
        return r;
4451
}
4452

4453
Architecture dissected_image_architecture(DissectedImage *m) {
153✔
4454
        assert(m);
153✔
4455

4456
        if (m->partitions[PARTITION_ROOT].found &&
153✔
4457
            m->partitions[PARTITION_ROOT].architecture >= 0)
150✔
4458
                return m->partitions[PARTITION_ROOT].architecture;
4459

4460
        if (m->partitions[PARTITION_USR].found &&
22✔
4461
            m->partitions[PARTITION_USR].architecture >= 0)
3✔
4462
                return m->partitions[PARTITION_USR].architecture;
3✔
4463

4464
        return _ARCHITECTURE_INVALID;
4465
}
4466

4467
bool dissected_image_is_portable(DissectedImage *m) {
35✔
4468
        return m && (strv_env_pairs_get(m->os_release, "PORTABLE_PREFIXES") || strv_env_pairs_get(m->os_release, "PORTABLE_SCOPE"));
35✔
4469
}
4470

4471
bool dissected_image_is_initrd(DissectedImage *m) {
35✔
4472
        return m && !strv_isempty(m->initrd_release);
35✔
4473
}
4474

4475
int dissect_loop_device(
2,574✔
4476
                LoopDevice *loop,
4477
                const VeritySettings *verity,
4478
                const MountOptions *mount_options,
4479
                const ImagePolicy *image_policy,
4480
                const ImageFilter *image_filter,
4481
                DissectImageFlags flags,
4482
                DissectedImage **ret) {
4483

4484
#if HAVE_BLKID
4485
        _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
2,574✔
4486
        int r;
2,574✔
4487

4488
        assert(loop);
2,574✔
4489

4490
        r = dissected_image_new(loop->backing_file ?: loop->node, &m);
2,574✔
4491
        if (r < 0)
2,574✔
4492
                return r;
4493

4494
        m->loop = loop_device_ref(loop);
2,574✔
4495
        m->image_size = m->loop->device_size;
2,574✔
4496
        m->sector_size = m->loop->sector_size;
2,574✔
4497

4498
        r = dissect_image(
5,148✔
4499
                        m,
4500
                        loop->fd,
4501
                        loop->node,
2,574✔
4502
                        verity,
4503
                        mount_options,
4504
                        image_policy,
4505
                        image_filter,
4506
                        flags);
4507
        if (r < 0)
2,574✔
4508
                return r;
4509

4510
        if (ret)
2,501✔
4511
                *ret = TAKE_PTR(m);
2,501✔
4512

4513
        return 0;
4514
#else
4515
        return -EOPNOTSUPP;
4516
#endif
4517
}
4518

4519
int dissect_loop_device_and_warn(
151✔
4520
                LoopDevice *loop,
4521
                const VeritySettings *verity,
4522
                const MountOptions *mount_options,
4523
                const ImagePolicy *image_policy,
4524
                const ImageFilter *image_filter,
4525
                DissectImageFlags flags,
4526
                DissectedImage **ret) {
4527

4528
        assert(loop);
151✔
4529

4530
        return dissect_log_error(
151✔
4531
                        LOG_ERR,
4532
                        dissect_loop_device(loop, verity, mount_options, image_policy, image_filter, flags, ret),
4533
                        loop->backing_file ?: loop->node,
151✔
4534
                        verity);
4535
}
4536

4537
bool dissected_image_verity_candidate(const DissectedImage *image, PartitionDesignator partition_designator) {
57✔
4538
        assert(image);
57✔
4539

4540
        /* Checks if this partition could theoretically do Verity. For non-partitioned images this only works
4541
         * if there's an external verity file supplied, for which we can consult .has_verity. For partitioned
4542
         * images we only check the partition type.
4543
         *
4544
         * This call is used to decide whether to suppress or show a verity column in tabular output of the
4545
         * image. */
4546

4547
        if (image->single_file_system)
57✔
4548
                return partition_designator == PARTITION_ROOT && image->has_verity;
10✔
4549

4550
        return partition_verity_hash_of(partition_designator) >= 0;
52✔
4551
}
4552

4553
bool dissected_image_verity_ready(const DissectedImage *image, PartitionDesignator partition_designator) {
4✔
4554
        PartitionDesignator k;
4✔
4555

4556
        assert(image);
4✔
4557

4558
        /* Checks if this partition has verity data available that we can activate. For non-partitioned this
4559
         * works for the root partition, for others only if the associated verity partition was found. */
4560

4561
        if (!image->verity_ready)
4✔
4562
                return false;
4563

4564
        if (image->single_file_system)
×
4565
                return partition_designator == PARTITION_ROOT;
×
4566

4567
        k = partition_verity_hash_of(partition_designator);
×
4568
        return k >= 0 && image->partitions[k].found;
×
4569
}
4570

4571
bool dissected_image_verity_sig_ready(const DissectedImage *image, PartitionDesignator partition_designator) {
20✔
4572
        PartitionDesignator k;
20✔
4573

4574
        assert(image);
20✔
4575

4576
        /* Checks if this partition has verity signature data available that we can use. */
4577

4578
        if (!image->verity_sig_ready)
20✔
4579
                return false;
4580

4581
        if (image->single_file_system)
16✔
4582
                return partition_designator == PARTITION_ROOT;
×
4583

4584
        k = partition_verity_sig_of(partition_designator);
16✔
4585
        return k >= 0 && image->partitions[k].found;
16✔
4586
}
4587

4588
int mount_options_set_and_consume(MountOptions **options, PartitionDesignator d, char *s) {
8✔
4589
        assert(options);
8✔
4590
        assert(d >= 0);
8✔
4591

4592
        if (!*options) {
8✔
4593
                *options = new0(MountOptions, 1);
7✔
4594
                if (!*options) {
7✔
4595
                        free(s);
×
4596
                        return log_oom();
×
4597
                }
4598
        }
4599

4600
        return free_and_replace((*options)->options[d], s);
8✔
4601
}
4602

4603
int mount_options_dup(const MountOptions *source, MountOptions **ret) {
15✔
4604
        _cleanup_(mount_options_free_allp) MountOptions *options = NULL;
15✔
4605

4606
        assert(source);
15✔
4607
        assert(ret);
15✔
4608

4609
        options = new0(MountOptions, 1);
15✔
4610
        if (!options)
15✔
4611
                return log_oom_debug();
×
4612

4613
        for (PartitionDesignator d = 0; d < _PARTITION_DESIGNATOR_MAX; d++)
210✔
4614
                if (source->options[d]) {
195✔
4615
                        options->options[d] = strdup(source->options[d]);
15✔
4616
                        if (!options->options[d])
15✔
4617
                                return log_oom_debug();
×
4618
                }
4619

4620
        *ret = TAKE_PTR(options);
15✔
4621
        return 0;
15✔
4622
}
4623

4624
int mount_options_to_string(const MountOptions *mount_options, char **ret) {
×
4625
        _cleanup_free_ char *s = NULL;
×
4626

4627
        assert(mount_options);
×
4628
        assert(ret);
×
4629

4630
        for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++)
×
4631
                if (!isempty(mount_options->options[i]))
×
4632
                        if (!strextend_with_separator(&s, ":", "%s:%s",
×
4633
                                       partition_designator_to_string(i),
4634
                                       mount_options->options[i]))
4635
                                return log_oom_debug();
×
4636

4637
        *ret = TAKE_PTR(s);
×
4638

4639
        return 0;
×
4640
}
4641

4642
MountOptions* mount_options_free_all(MountOptions *options) {
59,460✔
4643
        if (!options)
59,460✔
4644
                return NULL;
4645

4646
        free_many_charp(options->options, _PARTITION_DESIGNATOR_MAX);
33✔
4647

4648
        return mfree(options);
33✔
4649
}
4650

4651
const char* mount_options_from_designator(const MountOptions *options, PartitionDesignator designator) {
3,906✔
4652
        assert(designator >= 0 && designator < _PARTITION_DESIGNATOR_MAX);
3,906✔
4653

4654
        if (!options)
3,906✔
4655
                return NULL;
4656

4657
        return options->options[designator];
51✔
4658
}
4659

4660
int mount_image_privately_interactively(
28✔
4661
                const char *image,
4662
                const ImagePolicy *image_policy,
4663
                DissectImageFlags flags,
4664
                char **ret_directory,
4665
                int *ret_dir_fd,
4666
                LoopDevice **ret_loop_device) {
4667

4668
        _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
28✔
4669
        _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
28✔
4670
        _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
×
4671
        _cleanup_free_ char *dir = NULL;
28✔
4672
        int r;
28✔
4673

4674
        /* Mounts an OS image at a temporary place, inside a newly created mount namespace of our own. This
4675
         * is used by tools such as systemd-tmpfiles or systemd-firstboot to operate on some disk image
4676
         * easily. */
4677

4678
        assert(image);
28✔
4679
        assert(ret_loop_device);
28✔
4680

4681
        /* We intend to mount this right-away, hence add the partitions if needed and pin them. */
4682
        flags |= DISSECT_IMAGE_ADD_PARTITION_DEVICES |
28✔
4683
                DISSECT_IMAGE_PIN_PARTITION_DEVICES;
4684

4685
        r = verity_settings_load(&verity, image, NULL, NULL);
28✔
4686
        if (r < 0)
28✔
4687
                return log_error_errno(r, "Failed to load root hash data: %m");
×
4688

4689
        r = loop_device_make_by_path(
56✔
4690
                        image,
4691
                        FLAGS_SET(flags, DISSECT_IMAGE_DEVICE_READ_ONLY) ? O_RDONLY : -1,
4692
                        /* sector_size= */ UINT32_MAX,
4693
                        FLAGS_SET(flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
4694
                        LOCK_SH,
4695
                        &d);
4696
        if (r < 0)
28✔
4697
                return log_error_errno(r, "Failed to set up loopback device for %s: %m", image);
×
4698

4699
        r = dissect_loop_device_and_warn(
28✔
4700
                        d,
4701
                        &verity,
4702
                        /* mount_options= */ NULL,
4703
                        image_policy,
4704
                        /* image_filter= */ NULL,
4705
                        flags,
4706
                        &dissected_image);
4707
        if (r < 0)
28✔
4708
                return r;
4709

4710
        r = dissected_image_load_verity_sig_partition(dissected_image, d->fd, &verity);
28✔
4711
        if (r < 0)
28✔
4712
                return r;
4713

4714
        r = dissected_image_guess_verity_roothash(dissected_image, &verity);
28✔
4715
        if (r < 0)
28✔
4716
                return r;
4717

4718
        r = dissected_image_decrypt_interactively(dissected_image, NULL, &verity, image_policy, flags);
28✔
4719
        if (r < 0)
28✔
4720
                return r;
4721

4722
        r = detach_mount_namespace();
28✔
4723
        if (r < 0)
28✔
4724
                return log_error_errno(r, "Failed to detach mount namespace: %m");
×
4725

4726
        r = mkdir_p("/run/systemd/mount-rootfs", 0555);
28✔
4727
        if (r < 0)
28✔
4728
                return log_error_errno(r, "Failed to create mount point: %m");
×
4729

4730
        r = dissected_image_mount_and_warn(
28✔
4731
                        dissected_image,
4732
                        "/run/systemd/mount-rootfs",
4733
                        /* uid_shift= */ UID_INVALID,
4734
                        /* uid_range= */ UID_INVALID,
4735
                        /* userns_fd= */ -EBADF,
4736
                        flags);
4737
        if (r < 0)
28✔
4738
                return r;
4739

4740
        r = loop_device_flock(d, LOCK_UN);
28✔
4741
        if (r < 0)
28✔
4742
                return r;
4743

4744
        r = dissected_image_relinquish(dissected_image);
28✔
4745
        if (r < 0)
28✔
4746
                return log_error_errno(r, "Failed to relinquish DM and loopback block devices: %m");
×
4747

4748
        if (ret_directory) {
28✔
4749
                dir = strdup("/run/systemd/mount-rootfs");
28✔
4750
                if (!dir)
28✔
4751
                        return log_oom();
×
4752
        }
4753

4754
        if (ret_dir_fd) {
28✔
4755
                _cleanup_close_ int dir_fd = -EBADF;
28✔
4756

4757
                dir_fd = open("/run/systemd/mount-rootfs", O_CLOEXEC|O_DIRECTORY);
×
4758
                if (dir_fd < 0)
×
4759
                        return log_error_errno(errno, "Failed to open mount point directory: %m");
×
4760

4761
                *ret_dir_fd = TAKE_FD(dir_fd);
×
4762
        }
4763

4764
        if (ret_directory)
28✔
4765
                *ret_directory = TAKE_PTR(dir);
28✔
4766

4767
        *ret_loop_device = TAKE_PTR(d);
28✔
4768
        return 0;
28✔
4769
}
4770

4771
static bool mount_options_relax_extension_release_checks(const MountOptions *options) {
173✔
4772
        if (!options)
173✔
4773
                return false;
4774

4775
        return string_contains_word(mount_options_from_designator(options, PARTITION_ROOT), ",", "x-systemd.relax-extension-release-check") ||
18✔
4776
                        string_contains_word(mount_options_from_designator(options, PARTITION_USR), ",", "x-systemd.relax-extension-release-check");
9✔
4777
}
4778

4779
int verity_dissect_and_mount(
94✔
4780
                int src_fd,
4781
                const char *src,
4782
                const char *dest,
4783
                const MountOptions *options,
4784
                const ImagePolicy *image_policy,
4785
                const ImageFilter *image_filter,
4786
                const ExtensionReleaseData *extension_release_data,
4787
                ImageClass required_class,
4788
                VeritySettings *verity,
4789
                RuntimeScope runtime_scope,
4790
                DissectedImage **ret_image) {
4791

4792
        _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
94✔
4793
        _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
94✔
4794
        _cleanup_(verity_settings_done) VeritySettings local_verity = VERITY_SETTINGS_DEFAULT;
×
4795
        _cleanup_close_ int userns_fd = -EBADF;
94✔
4796
        DissectImageFlags dissect_image_flags;
94✔
4797
        bool relax_extension_release_check;
94✔
4798
        int r;
94✔
4799

4800
        assert(src);
94✔
4801
        /* Verifying release metadata requires mounted image for now, so ensure the check is skipped when
4802
         * opening an image without mounting it immediately (i.e.: 'dest' is NULL). */
4803
        assert(!extension_release_data || dest);
94✔
4804

4805
        relax_extension_release_check = mount_options_relax_extension_release_checks(options);
94✔
4806

4807
        /* We might get an FD for the image, but we use the original path to look for the dm-verity files.
4808
         * The caller might also give us a pre-loaded VeritySettings, in which case we just use it. It will
4809
         * also be extended, as dissected_image_load_verity_sig_partition() is invoked. */
4810
        if (!verity) {
94✔
4811
                r = verity_settings_load(&local_verity, src, NULL, NULL);
2✔
4812
                if (r < 0)
2✔
4813
                        return log_debug_errno(r, "Failed to load root hash: %m");
×
4814

4815
                verity = &local_verity;
4816
        }
4817

4818
        dissect_image_flags =
188✔
4819
                (verity->data_path ? DISSECT_IMAGE_NO_PARTITION_TABLE : 0) |
94✔
4820
                (relax_extension_release_check ? DISSECT_IMAGE_RELAX_EXTENSION_CHECK : 0) |
94✔
4821
                DISSECT_IMAGE_ADD_PARTITION_DEVICES |
4822
                DISSECT_IMAGE_PIN_PARTITION_DEVICES |
4823
                DISSECT_IMAGE_ALLOW_USERSPACE_VERITY |
94✔
4824
                DISSECT_IMAGE_VERITY_SHARE;
4825

4826
        /* First check if we have a verity device already open and with a fstype pinned by policy. If it
4827
         * cannot be found, then fallback to the slow path (full dissect). */
4828
        r = dissected_image_new_from_existing_verity(
96✔
4829
                        src_fd >= 0 ? FORMAT_PROC_FD_PATH(src_fd) : src,
2✔
4830
                        verity,
4831
                        options,
4832
                        image_policy,
4833
                        image_filter,
4834
                        runtime_scope,
4835
                        dissect_image_flags,
4836
                        &dissected_image);
4837
        if (r < 0 && !ERRNO_IS_NEG_DEVICE_ABSENT(r) && r != -ENOPKG)
94✔
4838
                return r;
4839
        if (r >= 0)
×
4840
                log_debug("Reusing pre-existing verity-protected image %s", src_fd >= 0 ? FORMAT_PROC_FD_PATH(src_fd) : src);
×
4841
        else {
4842
                if (runtime_scope == RUNTIME_SCOPE_SYSTEM) {
94✔
4843
                        /* Note that we don't use loop_device_make here, as the FD is most likely O_PATH which would not be
4844
                        * accepted by LOOP_CONFIGURE, so just let loop_device_make_by_path reopen it as a regular FD. */
4845
                        r = loop_device_make_by_path(
88✔
4846
                                        src_fd >= 0 ? FORMAT_PROC_FD_PATH(src_fd) : src,
2✔
4847
                                        /* open_flags= */ -1,
4848
                                        /* sector_size= */ UINT32_MAX,
4849
                                        verity->data_path ? 0 : LO_FLAGS_PARTSCAN,
88✔
4850
                                        LOCK_SH,
4851
                                        &loop_device);
4852
                        if (r < 0)
88✔
4853
                                return log_debug_errno(r, "Failed to create loop device for image: %m");
1✔
4854

4855
                        r = dissect_loop_device(
87✔
4856
                                        loop_device,
4857
                                        verity,
4858
                                        options,
4859
                                        image_policy,
4860
                                        image_filter,
4861
                                        dissect_image_flags,
4862
                                        &dissected_image);
4863
                        /* No partition table? Might be a single-filesystem image, try again */
4864
                        if (!verity->data_path && r == -ENOPKG)
87✔
4865
                                r = dissect_loop_device(
68✔
4866
                                                loop_device,
4867
                                                verity,
4868
                                                options,
4869
                                                image_policy,
4870
                                                image_filter,
4871
                                                dissect_image_flags | DISSECT_IMAGE_NO_PARTITION_TABLE,
68✔
4872
                                                &dissected_image);
4873
                        if (r < 0)
87✔
4874
                                return log_debug_errno(r, "Failed to dissect image: %m");
×
4875

4876
                        r = dissected_image_load_verity_sig_partition(dissected_image, loop_device->fd, verity);
87✔
4877
                        if (r < 0)
87✔
4878
                                return r;
4879

4880
                        r = dissected_image_guess_verity_roothash(dissected_image, verity);
87✔
4881
                        if (r < 0)
87✔
4882
                                return r;
4883

4884
                        r = dissected_image_decrypt(
87✔
4885
                                        dissected_image,
4886
                                        /* root= */ NULL,
4887
                                        /* passphrase= */ NULL,
4888
                                        verity,
4889
                                        image_policy,
4890
                                        dissect_image_flags);
4891
                        if (r < 0)
87✔
4892
                                return log_debug_errno(r, "Failed to decrypt dissected image: %m");
×
4893
                } else {
4894
                        userns_fd = namespace_open_by_type(NAMESPACE_USER);
6✔
4895
                        if (userns_fd < 0)
6✔
4896
                                return log_debug_errno(userns_fd, "Failed to open our own user namespace: %m");
5✔
4897

4898
                        r = mountfsd_mount_image(
6✔
4899
                                        /* vl= */ NULL,
4900
                                        src_fd >= 0 ? FORMAT_PROC_FD_PATH(src_fd) : src,
×
4901
                                        userns_fd,
4902
                                        options,
4903
                                        image_policy,
4904
                                        verity,
4905
                                        dissect_image_flags,
4906
                                        &dissected_image);
4907
                        if (r < 0)
6✔
4908
                                return r;
4909
                }
4910
        }
4911

4912
        if (dest) {
88✔
4913
                r = mkdir_p_label(dest, 0755);
86✔
4914
                if (r < 0)
86✔
4915
                        return log_debug_errno(r, "Failed to create destination directory %s: %m", dest);
×
4916
                r = umount_recursive(dest, 0);
86✔
4917
                if (r < 0)
86✔
4918
                        return log_debug_errno(r, "Failed to umount under destination directory %s: %m", dest);
×
4919
        }
4920

4921
        r = dissected_image_mount(
88✔
4922
                        dissected_image,
4923
                        dest,
4924
                        /* uid_shift= */ UID_INVALID,
4925
                        /* uid_range= */ UID_INVALID,
4926
                        userns_fd,
4927
                        dissect_image_flags);
4928
        if (r < 0)
88✔
4929
                return log_debug_errno(r, "Failed to mount image: %m");
×
4930

4931
        if (loop_device) {
88✔
4932
                r = loop_device_flock(loop_device, LOCK_UN);
87✔
4933
                if (r < 0)
87✔
4934
                        return log_debug_errno(r, "Failed to unlock loopback device: %m");
×
4935
        }
4936

4937
        /* If we got os-release values from the caller, then we need to match them with the image's
4938
         * extension-release.d/ content. Return -EINVAL if there's any mismatch.
4939
         * First, check the distro ID. If that matches, then check the new SYSEXT_LEVEL value if
4940
         * available, or else fallback to VERSION_ID. If neither is present (eg: rolling release),
4941
         * then a simple match on the ID will be performed. Also if an extension class was specified,
4942
         * check that it matches or return ENOCSI (which looks like error-no-class if one squints enough). */
4943
        if ((extension_release_data && extension_release_data->os_release_id) || required_class >= 0) {
88✔
4944
                _cleanup_strv_free_ char **extension_release = NULL;
3✔
4945
                ImageClass class = IMAGE_SYSEXT;
28✔
4946

4947
                r = load_extension_release_pairs(dest, required_class >= 0 ? required_class : IMAGE_SYSEXT, dissected_image->image_name, relax_extension_release_check, &extension_release);
37✔
4948
                if (r == -ENOENT) {
28✔
4949
                        if (required_class >= 0)
8✔
4950
                                return log_debug_errno(SYNTHETIC_ERRNO(ENOCSI), "Image %s extension-release metadata does not match the expected class", dissected_image->image_name);
3✔
4951

4952
                        r = load_extension_release_pairs(dest, IMAGE_CONFEXT, dissected_image->image_name, relax_extension_release_check, &extension_release);
5✔
4953
                        if (r >= 0)
5✔
4954
                                class = IMAGE_CONFEXT;
4955
                }
4956
                if (r < 0)
20✔
4957
                        return log_debug_errno(r, "Failed to parse image %s extension-release metadata: %m", dissected_image->image_name);
×
4958

4959
                if (extension_release_data && !isempty(extension_release_data->os_release_id)) {
50✔
4960
                        r = extension_release_validate(
25✔
4961
                                        dissected_image->image_name,
25✔
4962
                                        extension_release_data->os_release_id,
4963
                                        extension_release_data->os_release_id_like,
25✔
4964
                                        extension_release_data->os_release_version_id,
25✔
4965
                                        class == IMAGE_SYSEXT ? extension_release_data->os_release_sysext_level : extension_release_data->os_release_confext_level,
4966
                                        extension_release_data->os_release_extension_scope,
25✔
4967
                                        extension_release,
4968
                                        class);
4969
                        if (r == 0)
25✔
4970
                                return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Image %s extension-release metadata does not match the root's", dissected_image->image_name);
×
4971
                        if (r < 0)
25✔
4972
                                return log_debug_errno(r, "Failed to compare image %s extension-release metadata with the root's os-release: %m", dissected_image->image_name);
×
4973
                }
4974
        }
4975

4976
        r = dissected_image_relinquish(dissected_image);
85✔
4977
        if (r < 0)
85✔
4978
                return log_debug_errno(r, "Failed to relinquish dissected image: %m");
×
4979

4980
        if (ret_image)
85✔
4981
                *ret_image = TAKE_PTR(dissected_image);
2✔
4982

4983
        return 0;
4984
}
4985

4986
void extension_release_data_done(ExtensionReleaseData *data) {
92✔
4987
        assert(data);
92✔
4988

4989
        data->os_release_id = mfree(data->os_release_id);
92✔
4990
        data->os_release_id_like = mfree(data->os_release_id_like);
92✔
4991
        data->os_release_version_id = mfree(data->os_release_version_id);
92✔
4992
        data->os_release_sysext_level = mfree(data->os_release_sysext_level);
92✔
4993
        data->os_release_confext_level = mfree(data->os_release_confext_level);
92✔
4994
        data->os_release_extension_scope = mfree(data->os_release_extension_scope);
92✔
4995
}
92✔
4996

4997
int get_common_dissect_directory(char **ret) {
130✔
4998
        _cleanup_free_ char *t = NULL;
130✔
4999
        int r;
130✔
5000

5001
        /* A common location we mount dissected images to. The assumption is that everyone who uses this
5002
         * function runs in their own private mount namespace (with mount propagation off on /run/systemd/,
5003
         * and thus can mount something here without affecting anyone else). */
5004

5005
        t = strdup("/run/systemd/dissect-root");
130✔
5006
        if (!t)
130✔
5007
                return log_oom_debug();
×
5008

5009
        r = mkdir_parents(t, 0755);
130✔
5010
        if (r < 0)
130✔
5011
                return log_debug_errno(r, "Failed to create parent dirs of mount point '%s': %m", t);
×
5012

5013
        r = RET_NERRNO(mkdir(t, 0000)); /* It's supposed to be overmounted, hence let's make this inaccessible */
130✔
5014
        if (r < 0 && r != -EEXIST)
130✔
5015
                return log_debug_errno(r, "Failed to create mount point '%s': %m", t);
×
5016

5017
        if (ret)
130✔
5018
                *ret = TAKE_PTR(t);
130✔
5019

5020
        return 0;
5021
}
5022

5023
#if HAVE_BLKID
5024

5025
static JSON_DISPATCH_ENUM_DEFINE(dispatch_architecture, Architecture, architecture_from_string);
10✔
5026
static JSON_DISPATCH_ENUM_DEFINE(dispatch_partition_designator, PartitionDesignator, partition_designator_from_string);
71✔
5027

5028
typedef struct PartitionFields {
5029
        PartitionDesignator designator;
5030
        bool rw;
5031
        bool growfs;
5032
        unsigned partno;
5033
        Architecture architecture;
5034
        sd_id128_t uuid;
5035
        char *fstype;
5036
        char *label;
5037
        uint64_t size;
5038
        uint64_t offset;
5039
        unsigned fsmount_fd_idx;
5040
} PartitionFields;
5041

5042
static void partition_fields_done(PartitionFields *f) {
71✔
5043
        assert(f);
71✔
5044

5045
        f->fstype = mfree(f->fstype);
71✔
5046
        f->label = mfree(f->label);
71✔
5047
}
71✔
5048

5049
typedef struct MountImageReplyParameters {
5050
        sd_json_variant *partitions;
5051
        bool single_file_system;
5052
        char *image_policy;
5053
        uint64_t image_size;
5054
        uint32_t sector_size;
5055
        sd_id128_t image_uuid;
5056
} MountImageReplyParameters;
5057

5058
static void mount_image_reply_parameters_done(MountImageReplyParameters *p) {
79✔
5059
        assert(p);
79✔
5060

5061
        p->image_policy = mfree(p->image_policy);
79✔
5062
        p->partitions = sd_json_variant_unref(p->partitions);
79✔
5063
}
79✔
5064

5065
#endif
5066

5067
int mountfsd_connect(sd_varlink **ret) {
128✔
5068
        int r;
128✔
5069

5070
        assert(ret);
128✔
5071

5072
        _cleanup_(sd_varlink_unrefp) sd_varlink *vl = NULL;
128✔
5073
        r = sd_varlink_connect_address(&vl, "/run/systemd/io.systemd.MountFileSystem");
128✔
5074
        if (r < 0)
128✔
5075
                return log_debug_errno(r, "Failed to connect to mountfsd: %m");
×
5076

5077
        r = sd_varlink_set_allow_fd_passing_input(vl, true);
128✔
5078
        if (r < 0)
128✔
5079
                return log_debug_errno(r, "Failed to enable varlink fd passing for read: %m");
×
5080

5081
        r = sd_varlink_set_allow_fd_passing_output(vl, true);
128✔
5082
        if (r < 0)
128✔
5083
                return log_debug_errno(r, "Failed to enable varlink fd passing for write: %m");
×
5084

5085
        *ret = TAKE_PTR(vl);
128✔
5086
        return 0;
128✔
5087
}
5088

5089
int mountfsd_mount_image_fd(
79✔
5090
                sd_varlink *vl,
5091
                int image_fd,
5092
                int userns_fd,
5093
                const MountOptions *options,
5094
                const ImagePolicy *image_policy,
5095
                const VeritySettings *verity,
5096
                DissectImageFlags flags,
5097
                DissectedImage **ret) {
5098

5099
#if HAVE_BLKID
5100
        _cleanup_(mount_image_reply_parameters_done) MountImageReplyParameters p = {};
79✔
5101

5102
        static const sd_json_dispatch_field dispatch_table[] = {
79✔
5103
                { "partitions",         SD_JSON_VARIANT_ARRAY,         sd_json_dispatch_variant, offsetof(struct MountImageReplyParameters, partitions),         SD_JSON_MANDATORY },
5104
                { "singleFileSystem",   SD_JSON_VARIANT_BOOLEAN,       sd_json_dispatch_stdbool, offsetof(struct MountImageReplyParameters, single_file_system), 0                 },
5105
                { "imagePolicy",        SD_JSON_VARIANT_STRING,        sd_json_dispatch_string,  offsetof(struct MountImageReplyParameters, image_policy),       0                 },
5106
                { "imageSize",          _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64,  offsetof(struct MountImageReplyParameters, image_size),         SD_JSON_MANDATORY },
5107
                { "sectorSize",         _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint32,  offsetof(struct MountImageReplyParameters, sector_size),        SD_JSON_MANDATORY },
5108
                { "imageUuid",          SD_JSON_VARIANT_STRING,        sd_json_dispatch_id128,   offsetof(struct MountImageReplyParameters, image_uuid),         0                 },
5109
                {}
5110
        };
5111

5112
        _cleanup_(dissected_image_unrefp) DissectedImage *di = NULL;
×
5113
        _cleanup_close_ int verity_data_fd = -EBADF;
79✔
5114
        _cleanup_free_ char *ps = NULL;
79✔
5115
        const char *error_id;
79✔
5116
        int r;
79✔
5117

5118
        assert(image_fd >= 0);
79✔
5119
        assert(ret);
79✔
5120

5121
        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
79✔
5122
        if (!vl) {
79✔
5123
                r = mountfsd_connect(&_vl);
71✔
5124
                if (r < 0)
71✔
5125
                        return r;
5126

5127
                vl = _vl;
71✔
5128
        }
5129

5130
        _cleanup_close_ int reopened_fd = -EBADF;
79✔
5131

5132
        image_fd = fd_reopen_condition(image_fd, O_CLOEXEC|O_NOCTTY|O_NONBLOCK|(FLAGS_SET(flags, DISSECT_IMAGE_MOUNT_READ_ONLY) ? O_RDONLY : O_RDWR), O_PATH, &reopened_fd);
95✔
5133
        if (image_fd < 0)
79✔
5134
                return log_debug_errno(image_fd, "Failed to reopen fd: %m");
×
5135

5136
        r = sd_varlink_push_dup_fd(vl, image_fd);
79✔
5137
        if (r < 0)
79✔
5138
                return log_debug_errno(r, "Failed to push image fd into varlink connection: %m");
×
5139

5140
        if (userns_fd >= 0) {
79✔
5141
                r = sd_varlink_push_dup_fd(vl, userns_fd);
79✔
5142
                if (r < 0)
79✔
5143
                        return log_debug_errno(r, "Failed to push image fd into varlink connection: %m");
×
5144
        }
5145

5146
        if (image_policy) {
79✔
5147
                r = image_policy_to_string(image_policy, /* simplify= */ false, &ps);
10✔
5148
                if (r < 0)
10✔
5149
                        return log_debug_errno(r, "Failed to format image policy to string: %m");
×
5150
        }
5151

5152
        if (verity && verity->data_path) {
79✔
5153
                verity_data_fd = open(verity->data_path, O_RDONLY|O_CLOEXEC);
64✔
5154
                if (verity_data_fd < 0)
64✔
5155
                        return log_debug_errno(errno, "Failed to open verity data file '%s': %m", verity->data_path);
×
5156

5157
                r = sd_varlink_push_dup_fd(vl, verity_data_fd);
64✔
5158
                if (r < 0)
64✔
5159
                        return log_debug_errno(r, "Failed to push verity data fd into varlink connection: %m");
×
5160
        }
5161

5162
        _cleanup_(sd_json_variant_unrefp) sd_json_variant *mount_options = NULL;
79✔
5163
        for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
1,106✔
5164
                _cleanup_free_ char *filtered = NULL;
1,027✔
5165

5166
                const char *o = mount_options_from_designator(options, i);
1,027✔
5167
                if (!o)
1,027✔
5168
                        continue;
1,024✔
5169

5170
                /* We communicate relaxExtensionReleaseCheck separately via the varlink API, so filter it out
5171
                 * from the mount options we pass to mountfsd. */
5172
                if (IN_SET(i, PARTITION_ROOT, PARTITION_USR)) {
3✔
5173
                        r = fstab_filter_options(
2✔
5174
                                        o,
5175
                                        "x-systemd.relax-extension-release-check\0",
5176
                                        /* ret_namefound= */ NULL,
5177
                                        /* ret_value= */ NULL,
5178
                                        /* ret_values= */ NULL,
5179
                                        &filtered);
5180
                        if (r < 0)
2✔
5181
                                return log_debug_errno(r, "Failed to filter mount options: %m");
×
5182

5183
                        if (isempty(filtered))
2✔
5184
                                continue;
×
5185
                }
5186

5187
                r = sd_json_variant_merge_objectbo(
4✔
5188
                                &mount_options,
5189
                                SD_JSON_BUILD_PAIR_STRING(partition_designator_to_string(i), filtered ?: o));
5190
                if (r < 0)
3✔
5191
                        return log_debug_errno(r, "Failed to build mount options array: %m");
×
5192
        }
5193

5194
        sd_json_variant *reply = NULL;
79✔
5195
        r = varlink_callbo_and_log(
205✔
5196
                        vl,
5197
                        "io.systemd.MountFileSystem.MountImage",
5198
                        &reply,
5199
                        &error_id,
5200
                        SD_JSON_BUILD_PAIR_UNSIGNED("imageFileDescriptor", 0),
5201
                        SD_JSON_BUILD_PAIR_CONDITION(userns_fd >= 0, "userNamespaceFileDescriptor", SD_JSON_BUILD_UNSIGNED(1)),
5202
                        SD_JSON_BUILD_PAIR_BOOLEAN("readOnly", FLAGS_SET(flags, DISSECT_IMAGE_MOUNT_READ_ONLY)),
5203
                        SD_JSON_BUILD_PAIR_BOOLEAN("growFileSystems", FLAGS_SET(flags, DISSECT_IMAGE_GROWFS)),
5204
                        SD_JSON_BUILD_PAIR_CONDITION(!!ps, "imagePolicy", SD_JSON_BUILD_STRING(ps)),
5205
                        JSON_BUILD_PAIR_VARIANT_NON_NULL("mountOptions", mount_options),
5206
                        SD_JSON_BUILD_PAIR_BOOLEAN("relaxExtensionReleaseChecks", mount_options_relax_extension_release_checks(options)),
5207
                        SD_JSON_BUILD_PAIR_BOOLEAN("veritySharing", FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE)),
5208
                        SD_JSON_BUILD_PAIR_CONDITION(verity_data_fd >= 0, "verityDataFileDescriptor", SD_JSON_BUILD_UNSIGNED(userns_fd >= 0 ? 2 : 1)),
5209
                        SD_JSON_BUILD_PAIR_CONDITION(verity && iovec_is_set(&verity->root_hash), "verityRootHash", JSON_BUILD_IOVEC_HEX(&verity->root_hash)),
5210
                        SD_JSON_BUILD_PAIR_CONDITION(verity && iovec_is_set(&verity->root_hash_sig), "verityRootHashSignature", JSON_BUILD_IOVEC_BASE64(&verity->root_hash_sig)),
5211
                        SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", FLAGS_SET(flags, DISSECT_IMAGE_ALLOW_INTERACTIVE_AUTH)));
5212
        if (r < 0)
79✔
5213
                return r;
5214

5215
        r = sd_json_dispatch(reply, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &p);
71✔
5216
        if (r < 0)
71✔
5217
                return log_debug_errno(r, "Failed to parse MountImage() reply: %m");
×
5218

5219
        log_debug("Effective image policy: %s", p.image_policy);
71✔
5220

5221
        sd_json_variant *i;
71✔
5222
        JSON_VARIANT_ARRAY_FOREACH(i, p.partitions) {
142✔
5223
                _cleanup_close_ int fsmount_fd = -EBADF;
71✔
5224

5225
                _cleanup_(partition_fields_done) PartitionFields pp = {
×
5226
                        .designator = _PARTITION_DESIGNATOR_INVALID,
5227
                        .architecture = _ARCHITECTURE_INVALID,
5228
                        .size = UINT64_MAX,
5229
                        .offset = UINT64_MAX,
5230
                        .fsmount_fd_idx = UINT_MAX,
5231
                };
5232

5233
                static const sd_json_dispatch_field partition_dispatch_table[] = {
71✔
5234
                        { "designator",          SD_JSON_VARIANT_STRING,        dispatch_partition_designator, offsetof(struct PartitionFields, designator),       SD_JSON_MANDATORY },
5235
                        { "writable",            SD_JSON_VARIANT_BOOLEAN,       sd_json_dispatch_stdbool,      offsetof(struct PartitionFields, rw),               SD_JSON_MANDATORY },
5236
                        { "growFileSystem",      SD_JSON_VARIANT_BOOLEAN,       sd_json_dispatch_stdbool,      offsetof(struct PartitionFields, growfs),           SD_JSON_MANDATORY },
5237
                        { "partitionNumber",     _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint,         offsetof(struct PartitionFields, partno),           0                 },
5238
                        { "architecture",        SD_JSON_VARIANT_STRING,        dispatch_architecture,         offsetof(struct PartitionFields, architecture),     0                 },
5239
                        { "partitionUuid",       SD_JSON_VARIANT_STRING,        sd_json_dispatch_id128,        offsetof(struct PartitionFields, uuid),             0                 },
5240
                        { "fileSystemType",      SD_JSON_VARIANT_STRING,        sd_json_dispatch_string,       offsetof(struct PartitionFields, fstype),           SD_JSON_MANDATORY },
5241
                        { "partitionLabel",      SD_JSON_VARIANT_STRING,        sd_json_dispatch_string,       offsetof(struct PartitionFields, label),            0                 },
5242
                        { "size",                _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64,       offsetof(struct PartitionFields, size),             SD_JSON_MANDATORY },
5243
                        { "offset",              _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64,       offsetof(struct PartitionFields, offset),           SD_JSON_MANDATORY },
5244
                        { "mountFileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint,         offsetof(struct PartitionFields, fsmount_fd_idx),   SD_JSON_MANDATORY },
5245
                        {}
5246
                };
5247

5248
                r = sd_json_dispatch(i, partition_dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &pp);
71✔
5249
                if (r < 0)
71✔
5250
                        return log_debug_errno(r, "Failed to parse partition data: %m");
×
5251

5252
                if (pp.fsmount_fd_idx != UINT_MAX) {
71✔
5253
                        fsmount_fd = sd_varlink_take_fd(vl, pp.fsmount_fd_idx);
71✔
5254
                        if (fsmount_fd < 0)
71✔
5255
                                return fsmount_fd;
5256
                }
5257

5258
                assert(pp.designator >= 0);
71✔
5259

5260
                if (!di) {
71✔
5261
                        r = dissected_image_new(/* path= */ NULL, &di);
71✔
5262
                        if (r < 0)
71✔
5263
                                return log_debug_errno(r, "Failed to allocated new dissected image structure: %m");
×
5264
                }
5265

5266
                if (di->partitions[pp.designator].found)
71✔
5267
                        return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Duplicate partition data for '%s'.", partition_designator_to_string(pp.designator));
×
5268

5269
                di->partitions[pp.designator] = (DissectedPartition) {
71✔
5270
                        .found = true,
5271
                        .rw = pp.rw,
71✔
5272
                        .growfs = pp.growfs,
71✔
5273
                        .partno = pp.partno,
71✔
5274
                        .architecture = pp.architecture,
71✔
5275
                        .uuid = pp.uuid,
5276
                        .fstype = TAKE_PTR(pp.fstype),
71✔
5277
                        .label = TAKE_PTR(pp.label),
71✔
5278
                        .mount_node_fd = -EBADF,
5279
                        .size = pp.size,
71✔
5280
                        .offset = pp.offset,
71✔
5281
                        .fsmount_fd = TAKE_FD(fsmount_fd),
71✔
5282
                };
5283
        }
5284

5285
        di->single_file_system = p.single_file_system;
71✔
5286
        di->image_size = p.image_size;
71✔
5287
        di->sector_size = p.sector_size;
71✔
5288
        di->image_uuid = p.image_uuid;
71✔
5289

5290
        *ret = TAKE_PTR(di);
71✔
5291
        return 0;
71✔
5292
#else
5293
        return -EOPNOTSUPP;
5294
#endif
5295
}
5296

5297
int mountfsd_mount_image(
18✔
5298
                sd_varlink *vl,
5299
                const char *path,
5300
                int userns_fd,
5301
                const MountOptions *options,
5302
                const ImagePolicy *image_policy,
5303
                const VeritySettings *verity,
5304
                DissectImageFlags flags,
5305
                DissectedImage **ret) {
5306

5307
        int r;
18✔
5308

5309
        assert(path);
18✔
5310
        assert(ret);
18✔
5311

5312
        _cleanup_close_ int image_fd = open(path, O_RDONLY|O_CLOEXEC);
36✔
5313
        if (image_fd < 0)
18✔
5314
                return log_debug_errno(errno, "Failed to open '%s': %m", path);
×
5315

5316
        _cleanup_(dissected_image_unrefp) DissectedImage *di = NULL;
18✔
5317
        r = mountfsd_mount_image_fd(vl, image_fd, userns_fd, options, image_policy, verity, flags, &di);
18✔
5318
        if (r < 0)
18✔
5319
                return r;
5320

5321
        if (!di->image_name) {
11✔
5322
                r = dissected_image_name_from_path(path, &di->image_name);
11✔
5323
                if (r < 0)
11✔
5324
                        return r;
5325
        }
5326

5327
        *ret = TAKE_PTR(di);
11✔
5328
        return 0;
11✔
5329
}
5330

5331
int mountfsd_mount_directory_fd(
69✔
5332
                sd_varlink *vl,
5333
                int directory_fd,
5334
                int userns_fd,
5335
                DissectImageFlags flags,
5336
                int *ret_mount_fd) {
5337

5338
        int r;
69✔
5339

5340
        assert(directory_fd >= 0);
69✔
5341
        assert(ret_mount_fd);
69✔
5342

5343
        /* Pick one identity, not both, that makes no sense. */
5344
        assert(!FLAGS_SET(flags, DISSECT_IMAGE_FOREIGN_UID|DISSECT_IMAGE_IDENTITY_UID));
69✔
5345

5346
        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
69✔
5347
        if (!vl) {
69✔
5348
                r = mountfsd_connect(&_vl);
15✔
5349
                if (r < 0)
15✔
5350
                        return r;
5351

5352
                vl = _vl;
15✔
5353
        }
5354

5355
        r = sd_varlink_push_dup_fd(vl, directory_fd);
69✔
5356
        if (r < 0)
69✔
5357
                return log_debug_errno(r, "Failed to push directory fd into varlink connection: %m");
×
5358

5359
        if (userns_fd >= 0) {
69✔
5360
                r = sd_varlink_push_dup_fd(vl, userns_fd);
69✔
5361
                if (r < 0)
69✔
5362
                        return log_debug_errno(r, "Failed to push user namespace fd into varlink connection: %m");
×
5363
        }
5364

5365
        sd_json_variant *reply = NULL;
69✔
5366
        const char *error_id = NULL;
69✔
5367
        r = varlink_callbo_and_log(
84✔
5368
                        vl,
5369
                        "io.systemd.MountFileSystem.MountDirectory",
5370
                        &reply,
5371
                        &error_id,
5372
                        SD_JSON_BUILD_PAIR_UNSIGNED("directoryFileDescriptor", 0),
5373
                        SD_JSON_BUILD_PAIR_CONDITION(userns_fd >= 0, "userNamespaceFileDescriptor", SD_JSON_BUILD_UNSIGNED(1)),
5374
                        SD_JSON_BUILD_PAIR_BOOLEAN("readOnly", FLAGS_SET(flags, DISSECT_IMAGE_MOUNT_READ_ONLY)),
5375
                        SD_JSON_BUILD_PAIR_STRING("mode", FLAGS_SET(flags, DISSECT_IMAGE_FOREIGN_UID) ? "foreign" :
5376
                                                          FLAGS_SET(flags, DISSECT_IMAGE_IDENTITY_UID) ? "identity" : "auto"),
5377
                        SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", FLAGS_SET(flags, DISSECT_IMAGE_ALLOW_INTERACTIVE_AUTH)));
5378
        if (r < 0)
69✔
5379
                return r;
5380

5381
        static const sd_json_dispatch_field dispatch_table[] = {
69✔
5382
                { "mountFileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, 0, SD_JSON_MANDATORY },
5383
                {}
5384
        };
5385

5386
        unsigned fsmount_fd_idx = UINT_MAX;
69✔
5387
        r = sd_json_dispatch(reply, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &fsmount_fd_idx);
69✔
5388
        if (r < 0)
69✔
5389
                return log_debug_errno(r, "Failed to parse MountImage() reply: %m");
×
5390

5391
        _cleanup_close_ int fsmount_fd = sd_varlink_take_fd(vl, fsmount_fd_idx);
138✔
5392
        if (fsmount_fd < 0)
69✔
5393
                return log_debug_errno(fsmount_fd, "Failed to take mount fd from Varlink connection: %m");
×
5394

5395
        *ret_mount_fd = TAKE_FD(fsmount_fd);
69✔
5396
        return 0;
69✔
5397
}
5398

5399
int mountfsd_mount_directory(
16✔
5400
                sd_varlink *vl,
5401
                const char *path,
5402
                int userns_fd,
5403
                DissectImageFlags flags,
5404
                int *ret_mount_fd) {
5405

5406
        assert(path);
16✔
5407
        assert(ret_mount_fd);
16✔
5408

5409
        _cleanup_close_ int directory_fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_PATH);
32✔
5410
        if (directory_fd < 0)
16✔
5411
                return log_debug_errno(errno, "Failed to open '%s': %m", path);
×
5412

5413
        return mountfsd_mount_directory_fd(vl, directory_fd, userns_fd, flags, ret_mount_fd);
16✔
5414
}
5415

5416
int mountfsd_make_directory_fd(
9✔
5417
                sd_varlink *vl,
5418
                int parent_fd,
5419
                const char *name,
5420
                mode_t mode,
5421
                DissectImageFlags flags,
5422
                int *ret_directory_fd) {
5423

5424
        int r;
9✔
5425

5426
        assert(parent_fd >= 0);
9✔
5427
        assert(name);
9✔
5428

5429
        _cleanup_(sd_varlink_unrefp) sd_varlink *_vl = NULL;
9✔
5430
        if (!vl) {
9✔
5431
                r = mountfsd_connect(&_vl);
1✔
5432
                if (r < 0)
1✔
5433
                        return r;
5434

5435
                vl = _vl;
1✔
5436
        }
5437

5438
        r = sd_varlink_push_dup_fd(vl, parent_fd);
9✔
5439
        if (r < 0)
9✔
5440
                return log_debug_errno(r, "Failed to push parent fd into varlink connection: %m");
×
5441

5442
        sd_json_variant *reply = NULL;
9✔
5443
        const char *error_id = NULL;
9✔
5444
        r = varlink_callbo_and_log(
9✔
5445
                        vl,
5446
                        "io.systemd.MountFileSystem.MakeDirectory",
5447
                        &reply,
5448
                        &error_id,
5449
                        SD_JSON_BUILD_PAIR_UNSIGNED("parentFileDescriptor", 0),
5450
                        SD_JSON_BUILD_PAIR_STRING("name", name),
5451
                        SD_JSON_BUILD_PAIR_CONDITION(!IN_SET(mode, MODE_INVALID, 0700), "mode", SD_JSON_BUILD_UNSIGNED(mode)), /* suppress this field if default/unset */
5452
                        SD_JSON_BUILD_PAIR_BOOLEAN("allowInteractiveAuthentication", FLAGS_SET(flags, DISSECT_IMAGE_ALLOW_INTERACTIVE_AUTH)));
5453
        if (r < 0)
9✔
5454
                return r;
5455

5456
        static const sd_json_dispatch_field dispatch_table[] = {
9✔
5457
                { "directoryFileDescriptor", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint, 0, SD_JSON_MANDATORY },
5458
                {}
5459
        };
5460

5461
        unsigned directory_fd_idx = UINT_MAX;
9✔
5462
        r = sd_json_dispatch(reply, dispatch_table, SD_JSON_ALLOW_EXTENSIONS, &directory_fd_idx);
9✔
5463
        if (r < 0)
9✔
5464
                return log_debug_errno(r, "Failed to parse MountImage() reply: %m");
×
5465

5466
        _cleanup_close_ int directory_fd = sd_varlink_take_fd(vl, directory_fd_idx);
18✔
5467
        if (directory_fd < 0)
9✔
5468
                return log_debug_errno(directory_fd, "Failed to take directory fd from Varlink connection: %m");
×
5469

5470
        if (ret_directory_fd)
9✔
5471
                *ret_directory_fd = TAKE_FD(directory_fd);
8✔
5472
        return 0;
5473
}
5474

5475
int mountfsd_make_directory(
8✔
5476
                sd_varlink *vl,
5477
                const char *path,
5478
                mode_t mode,
5479
                DissectImageFlags flags,
5480
                int *ret_directory_fd) {
5481

5482
        int r;
8✔
5483

5484
        _cleanup_free_ char *parent = NULL;
8✔
5485
        r = path_extract_directory(path, &parent);
8✔
5486
        if (r < 0)
8✔
5487
                return log_debug_errno(r, "Failed to extract parent directory from '%s': %m", path);
×
5488

5489
        _cleanup_free_ char *dirname = NULL;
8✔
5490
        r = path_extract_filename(path, &dirname);
8✔
5491
        if (r < 0)
8✔
5492
                return log_debug_errno(r, "Failed to extract directory name from '%s': %m", path);
×
5493

5494
        _cleanup_close_ int fd = open(parent, O_DIRECTORY|O_CLOEXEC);
16✔
5495
        if (fd < 0)
8✔
5496
                return log_debug_errno(r, "Failed to open '%s': %m", parent);
×
5497

5498
        return mountfsd_make_directory_fd(vl, fd, dirname, mode, flags, ret_directory_fd);
8✔
5499
}
5500

5501
int copy_tree_at_foreign(int source_fd, int target_fd, int userns_fd) {
2✔
5502
        int r;
2✔
5503

5504
        assert(source_fd >= 0);
2✔
5505
        assert(target_fd >= 0);
2✔
5506
        assert(userns_fd >= 0);
2✔
5507

5508
        /* Copies dir referenced by source_fd into dir referenced by source_fd, moves to the specified userns
5509
         * for that, which should be foreign UID range */
5510

5511
        r = pidref_safe_fork_full(
6✔
5512
                        "copy-tree",
5513
                        /* stdio_fds= */ NULL,
5514
                        (int[]) { userns_fd, source_fd, target_fd }, 3,
2✔
5515
                        FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG,
5516
                        /* ret= */ NULL);
5517
        if (r < 0)
4✔
5518
                return r;
2✔
5519
        if (r == 0) {
4✔
5520
                r = namespace_enter(
2✔
5521
                                /* pidns_fd= */ -EBADF,
5522
                                /* mntns_fd= */ -EBADF,
5523
                                /* netns_fd= */ -EBADF,
5524
                                userns_fd,
5525
                                /* root_fd= */ -EBADF);
5526
                if (r < 0) {
2✔
5527
                        log_debug_errno(r, "Failed to join user namespace: %m");
×
5528
                        _exit(EXIT_FAILURE);
×
5529
                }
5530

5531
                r = copy_tree_at(
2✔
5532
                                source_fd, /* from= */ NULL,
5533
                                target_fd, /* to= */ NULL,
5534
                                /* override_uid= */ UID_INVALID,
5535
                                /* override_gid= */ GID_INVALID,
5536
                                COPY_REFLINK|COPY_HARDLINKS|COPY_MERGE_EMPTY|COPY_MERGE_APPLY_STAT|COPY_SAME_MOUNT|COPY_ALL_XATTRS,
5537
                                /* denylist= */ NULL,
5538
                                /* subvolumes= */ NULL);
5539
                if (r < 0) {
2✔
5540
                        log_debug_errno(r, "Failed to copy tree: %m");
×
5541
                        _exit(EXIT_FAILURE);
×
5542
                }
5543

5544
                _exit(EXIT_SUCCESS);
2✔
5545
        }
5546

5547
        return 0;
5548
}
5549

5550
int remove_tree_foreign(const char *path, int userns_fd) {
×
5551
        int r;
×
5552

5553
        assert(path);
×
5554
        assert(userns_fd >= 0);
×
5555

5556
        _cleanup_close_ int tree_fd = -EBADF;
×
5557
        r = mountfsd_mount_directory(
×
5558
                        /* vl= */ NULL,
5559
                        path,
5560
                        userns_fd,
5561
                        DISSECT_IMAGE_FOREIGN_UID,
5562
                        &tree_fd);
5563
        if (r < 0)
×
5564
                return r;
5565

5566
        r = pidref_safe_fork_full(
×
5567
                        "rm-tree",
5568
                        /* stdio_fds= */ NULL,
5569
                        (int[]) { userns_fd, tree_fd }, 2,
×
5570
                        FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG,
5571
                        /* ret= */ NULL);
5572
        if (r < 0)
×
5573
                return r;
5574
        if (r == 0) {
×
5575
                /* child */
5576

5577
                r = namespace_enter(
×
5578
                                /* pidns_fd= */ -EBADF,
5579
                                /* mntns_fd= */ -EBADF,
5580
                                /* netns_fd= */ -EBADF,
5581
                                userns_fd,
5582
                                /* root_fd= */ -EBADF);
5583
                if (r < 0) {
×
5584
                        log_debug_errno(r, "Failed to join user namespace: %m");
×
5585
                        _exit(EXIT_FAILURE);
×
5586
                }
5587

5588
                _cleanup_close_ int dfd = fd_reopen(tree_fd, O_DIRECTORY|O_CLOEXEC);
×
5589
                if (dfd < 0) {
×
5590
                        log_debug_errno(r, "Failed to reopen tree fd: %m");
×
5591
                        _exit(EXIT_FAILURE);
×
5592
                }
5593

5594
                r = rm_rf_children(dfd, REMOVE_PHYSICAL|REMOVE_SUBVOLUME|REMOVE_CHMOD, /* root_dev= */ NULL);
×
5595
                if (r < 0)
×
5596
                        log_debug_errno(r, "Failed to empty '%s' directory in foreign UID mode, ignoring: %m", path);
×
5597

5598
                _exit(EXIT_SUCCESS);
×
5599
        }
5600

5601
        return 0;
5602
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc