• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 23927985597

02 Apr 2026 07:45PM UTC coverage: 72.362% (+0.02%) from 72.343%
23927985597

push

github

daandemeyer
ci: Drop base64 encoding in claude review workflow

Doesn't seem to work nearly as good as the previous solution which
just told claude not to escape stuff.

319121 of 441004 relevant lines covered (72.36%)

1167673.48 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

65.42
/src/shared/loop-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#if HAVE_VALGRIND_MEMCHECK_H
4
#include <valgrind/memcheck.h>
5
#endif
6

7
#include <fcntl.h>
8
#include <linux/loop.h>
9
#include <sys/file.h>
10
#include <sys/ioctl.h>
11
#include <unistd.h>
12

13
#include "sd-device.h"
14

15
#include "alloc-util.h"
16
#include "blockdev-util.h"
17
#include "data-fd-util.h"
18
#include "device-util.h"
19
#include "devnum-util.h"
20
#include "dissect-image.h"
21
#include "env-util.h"
22
#include "errno-util.h"
23
#include "fd-util.h"
24
#include "fileio.h"
25
#include "fs-util.h"
26
#include "loop-util.h"
27
#include "parse-util.h"
28
#include "path-util.h"
29
#include "random-util.h"
30
#include "stat-util.h"
31
#include "stdio-util.h"
32
#include "string-util.h"
33
#include "time-util.h"
34

35
static void cleanup_clear_loop_close(int *fd) {
2,590✔
36
        if (*fd < 0)
2,590✔
37
                return;
38

39
        (void) ioctl(*fd, LOOP_CLR_FD);
×
40
        (void) safe_close(*fd);
×
41
}
42

43
static int loop_is_bound(int fd) {
2,590✔
44
        struct loop_info64 info;
2,590✔
45

46
        if (ioctl(ASSERT_FD(fd), LOOP_GET_STATUS64, &info) < 0) {
2,590✔
47
                if (errno == ENXIO)
2,590✔
48
                        return false; /* not bound! */
2,590✔
49

50
                return -errno;
×
51
        }
52

53
        return true; /* bound! */
54
}
55

56
static int open_lock_fd(int primary_fd, int operation) {
2,723✔
57
        _cleanup_close_ int lock_fd = -EBADF;
2,723✔
58

59
        assert(IN_SET(operation & ~LOCK_NB, LOCK_SH, LOCK_EX));
2,723✔
60

61
        lock_fd = fd_reopen(ASSERT_FD(primary_fd), O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2,723✔
62
        if (lock_fd < 0)
2,723✔
63
                return lock_fd;
64

65
        if (flock(lock_fd, operation) < 0)
2,723✔
66
                return -errno;
×
67

68
        return TAKE_FD(lock_fd);
69
}
70

71
static int loop_configure_verify_direct_io(int fd, const struct loop_config *c) {
2,534✔
72
        assert(fd >= 0);
2,534✔
73
        assert(c);
2,534✔
74

75
        if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO)) {
2,534✔
76
                struct loop_info64 info;
2,533✔
77

78
                if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0)
2,533✔
79
                        return log_debug_errno(errno, "Failed to issue LOOP_GET_STATUS64: %m");
×
80

81
#if HAVE_VALGRIND_MEMCHECK_H
82
                VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
83
#endif
84

85
                /* On older kernels (<= 5.3) it was necessary to set the block size of the loopback block
86
                 * device to the logical block size of the underlying file system. Since there was no nice
87
                 * way to query the value, we are not bothering to do this however. On newer kernels the
88
                 * block size is propagated automatically and does not require intervention from us. We'll
89
                 * check here if enabling direct IO worked, to make this easily debuggable however.
90
                 *
91
                 * (Should anyone really care and actually wants direct IO on old kernels: it might be worth
92
                 * enabling direct IO with iteratively larger block sizes until it eventually works.)
93
                 *
94
                 * On older kernels (e.g.: 5.10) when this is attempted on a file stored on a dm-crypt
95
                 * backed partition the kernel will start returning I/O errors when accessing the mounted
96
                 * loop device, so return a recognizable error that causes the operation to be started
97
                 * from scratch without the LO_FLAGS_DIRECT_IO flag. */
98
                if (!FLAGS_SET(info.lo_flags, LO_FLAGS_DIRECT_IO))
2,533✔
99
                        return log_debug_errno(
×
100
                                        SYNTHETIC_ERRNO(ENOANO),
101
                                        "Could not enable direct IO mode, retrying in buffered IO mode.");
102
        }
103

104
        return 0;
105
}
106

107
static int loop_configure_verify(int fd, const struct loop_config *c) {
2,534✔
108
        bool broken = false;
2,534✔
109
        int r;
2,534✔
110

111
        assert(fd >= 0);
2,534✔
112
        assert(c);
2,534✔
113

114
        if (c->block_size != 0) {
2,534✔
115
                uint32_t ssz;
2,534✔
116

117
                r = blockdev_get_sector_size(fd, &ssz);
2,534✔
118
                if (r < 0)
2,534✔
119
                        return r;
×
120

121
                if (ssz != c->block_size) {
2,534✔
122
                        log_debug("LOOP_CONFIGURE didn't honour requested block size %" PRIu32 ", got %" PRIu32 " instead. Ignoring.", c->block_size, ssz);
×
123
                        broken = true;
124
                }
125
        }
126

127
        if (c->info.lo_sizelimit != 0) {
2,534✔
128
                /* Kernel 5.8 vanilla doesn't properly propagate the size limit into the
129
                 * block device. If it's used, let's immediately check if it had the desired
130
                 * effect hence. And if not use classic LOOP_SET_STATUS64. */
131
                uint64_t z;
99✔
132

133
                r = blockdev_get_device_size(fd, &z);
99✔
134
                if (r < 0)
99✔
135
                        return r;
×
136

137
                if (z != c->info.lo_sizelimit) {
99✔
138
                        log_debug("LOOP_CONFIGURE is broken, doesn't honour .info.lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
×
139
                        broken = true;
140
                }
141
        }
142

143
        if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_PARTSCAN)) {
2,534✔
144
                /* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag
145
                 * into the block device. Let's hence verify if things work correctly here
146
                 * before returning. */
147

148
                r = blockdev_partscan_enabled_fd(fd);
×
149
                if (r < 0)
×
150
                        return r;
151
                if (r == 0) {
×
152
                        log_debug("LOOP_CONFIGURE is broken, doesn't honour LO_FLAGS_PARTSCAN. Falling back to LOOP_SET_STATUS64.");
×
153
                        broken = true;
154
                }
155
        }
156

157
        r = loop_configure_verify_direct_io(fd, c);
2,534✔
158
        if (r < 0)
2,534✔
159
                return r;
160

161
        return !broken;
2,534✔
162
}
163

164
static int loop_configure_fallback(int fd, const struct loop_config *c) {
×
165
        struct loop_info64 info_copy;
×
166
        int r;
×
167

168
        assert(fd >= 0);
×
169
        assert(c);
×
170

171
        /* Only some of the flags LOOP_CONFIGURE can set are also settable via LOOP_SET_STATUS64, hence mask
172
         * them out. */
173
        info_copy = c->info;
×
174
        info_copy.lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS;
×
175

176
        /* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64
177
         * ioctl can return EAGAIN in case we change the info.lo_offset field, if someone else is accessing the
178
         * block device while we try to reconfigure it. This is a pretty common case, since udev might
179
         * instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways:
180
         * first, let's take the BSD lock to ensure that udev will not step in between the point in
181
         * time where we attach the fd and where we reconfigure the device. Secondly, let's wait 50ms on
182
         * EAGAIN and retry. The former should be an efficient mechanism to avoid we have to wait 50ms
183
         * needlessly if we are just racing against udev. The latter is protection against all other cases,
184
         * i.e. peers that do not take the BSD lock. */
185

186
        for (unsigned n_attempts = 0;;) {
×
187
                if (ioctl(fd, LOOP_SET_STATUS64, &info_copy) >= 0)
×
188
                        break;
189

190
                if (errno != EAGAIN || ++n_attempts >= 64)
×
191
                        return log_debug_errno(errno, "Failed to configure loopback block device: %m");
×
192

193
                /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more
194
                 * failed attempts we see */
195
                (void) usleep_safe(UINT64_C(10) * USEC_PER_MSEC +
×
196
                              random_u64_range(UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
×
197
        }
198

199
        /* If a block size is requested then try to configure it. If that doesn't work, ignore errors, but
200
         * afterwards, let's validate what is in effect, and if it doesn't match what we want, fail */
201
        if (c->block_size != 0) {
×
202
                uint32_t ssz;
×
203

204
                if (ioctl(fd, LOOP_SET_BLOCK_SIZE, (unsigned long) c->block_size) < 0)
×
205
                        log_debug_errno(errno, "Failed to set sector size, ignoring: %m");
×
206

207
                r = blockdev_get_sector_size(fd, &ssz);
×
208
                if (r < 0)
×
209
                        return log_debug_errno(r, "Failed to read sector size: %m");
×
210
                if (ssz != c->block_size)
×
211
                        return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Sector size of loopback device doesn't match what we requested, refusing.");
×
212
        }
213

214
        /* LO_FLAGS_DIRECT_IO is a flags we need to configure via explicit ioctls. */
215
        if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO))
×
216
                if (ioctl(fd, LOOP_SET_DIRECT_IO, 1UL) < 0)
×
217
                        log_debug_errno(errno, "Failed to enable direct IO mode, ignoring: %m");
×
218

219
        return loop_configure_verify_direct_io(fd, c);
×
220
}
221

222
static int loop_configure(
2,590✔
223
                int nr,
224
                int open_flags,
225
                int lock_op,
226
                const struct loop_config *c,
227
                LoopDevice **ret) {
228

229
        static bool loop_configure_broken = false;
2,590✔
230

231
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
2,590✔
232
        _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -EBADF; /* This must be declared before lock_fd. */
×
233
        _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
5,180✔
234
        _cleanup_free_ char *node = NULL;
2,590✔
235
        uint64_t diskseq = 0;
2,590✔
236
        dev_t devno;
2,590✔
237
        int r;
2,590✔
238

239
        assert(nr >= 0);
2,590✔
240
        assert(c);
2,590✔
241
        assert(ret);
2,590✔
242

243
        if (asprintf(&node, "/dev/loop%i", nr) < 0)
2,590✔
244
                return log_oom_debug();
×
245

246
        r = sd_device_new_from_devname(&dev, node);
2,590✔
247
        if (r < 0)
2,590✔
248
                return log_debug_errno(r, "Failed to create sd_device object for \"%s\": %m", node);
×
249

250
        r = sd_device_get_devnum(dev, &devno);
2,590✔
251
        if (r < 0)
2,590✔
252
                return log_device_debug_errno(dev, r, "Failed to get devnum: %m");
×
253

254
        fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
2,590✔
255
        if (fd < 0)
2,590✔
256
                return log_device_debug_errno(dev, fd, "Failed to open device: %m");
×
257

258
        /* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened
259
         * fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on
260
         * block devices to reprobe them, hence by having a separate fd we will later close() we can ensure
261
         * we trigger udev after everything is done. If we'd lock our own fd instead and keep it open for a
262
         * long time udev would possibly never run on it again, even though the fd is unlocked, simply
263
         * because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to
264
         * automatically release the lock, after we are done. */
265
        lock_fd = open_lock_fd(fd, LOCK_EX);
2,590✔
266
        if (lock_fd < 0)
2,590✔
267
                return log_device_debug_errno(dev, lock_fd, "Failed to acquire lock: %m");
×
268

269
        log_device_debug(dev, "Acquired exclusive lock.");
2,617✔
270

271
        /* Let's see if backing file is really unattached. Someone may already attach a backing file without
272
         * taking BSD lock. */
273
        r = loop_is_bound(fd);
2,590✔
274
        if (r < 0)
2,590✔
275
                return log_device_debug_errno(dev, r, "Failed to check if the loopback block device is bound: %m");
×
276
        if (r > 0)
2,590✔
277
                return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EBUSY),
×
278
                                              "The loopback block device is already bound, ignoring.");
279

280
        /* Let's see if the device is really detached, i.e. currently has no associated partition block
281
         * devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
282
         * superficially is detached but still has partition block devices associated for it. Let's then
283
         * manually remove the partitions via BLKPG, and tell the caller we did that via EUCLEAN, so they try
284
         * again. */
285
        r = block_device_remove_all_partitions(dev, fd);
2,590✔
286
        if (r < 0)
2,590✔
287
                return log_device_debug_errno(dev, r, "Failed to remove partitions on the loopback block device: %m");
×
288
        if (r > 0)
2,590✔
289
                /* Removed all partitions. Let's report this to the caller, to try again, and count this as
290
                 * an attempt. */
291
                return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EUCLEAN),
×
292
                                              "Removed partitions on the loopback block device.");
293

294
        if (!loop_configure_broken) {
2,590✔
295
                if (ioctl(fd, LOOP_CONFIGURE, c) < 0) {
2,590✔
296
                        /* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other errors. */
297
                        if (!ERRNO_IS_IOCTL_NOT_SUPPORTED(errno))
56✔
298
                                return log_device_debug_errno(dev, errno, "ioctl(LOOP_CONFIGURE) failed: %m");
56✔
299

300
                        loop_configure_broken = true;
×
301
                } else {
302
                        loop_with_fd = TAKE_FD(fd);
2,534✔
303

304
                        r = loop_configure_verify(loop_with_fd, c);
2,534✔
305
                        if (r < 0)
2,534✔
306
                                return log_device_debug_errno(dev, r, "Failed to verify if loopback block device is correctly configured: %m");
×
307
                        if (r == 0) {
2,534✔
308
                                /* LOOP_CONFIGURE doesn't work. Remember that. */
309
                                loop_configure_broken = true;
×
310

311
                                /* We return EBUSY here instead of retrying immediately with LOOP_SET_FD,
312
                                 * because LOOP_CLR_FD is async: if the operation cannot be executed right
313
                                 * away it just sets the autoclear flag on the device. This means there's a
314
                                 * good chance we cannot actually reuse the loopback device right-away. Hence
315
                                 * let's assume it's busy, avoid the trouble and let the calling loop call us
316
                                 * again with a new, likely unused device. */
317
                                return -EBUSY;
×
318
                        }
319
                }
320
        }
321

322
        if (loop_configure_broken) {
2,534✔
323
                if (ioctl(fd, LOOP_SET_FD, c->fd) < 0)
×
324
                        return log_device_debug_errno(dev, errno, "ioctl(LOOP_SET_FD) failed: %m");
×
325

326
                loop_with_fd = TAKE_FD(fd);
×
327

328
                r = loop_configure_fallback(loop_with_fd, c);
×
329
                if (r < 0)
×
330
                        return r;
331
        }
332

333
        r = fd_get_diskseq(loop_with_fd, &diskseq);
2,534✔
334
        if (r < 0 && r != -EOPNOTSUPP)
2,534✔
335
                return log_device_debug_errno(dev, r, "Failed to get diskseq: %m");
×
336

337
        switch (lock_op & ~LOCK_NB) {
2,534✔
338
        case LOCK_EX: /* Already in effect */
339
                break;
340
        case LOCK_SH: /* Downgrade */
2,401✔
341
                if (flock(lock_fd, lock_op) < 0)
2,401✔
342
                        return log_device_debug_errno(dev, errno, "Failed to downgrade lock level: %m");
×
343
                break;
344
        case LOCK_UN: /* Release */
×
345
                lock_fd = safe_close(lock_fd);
×
346
                break;
347
        default:
×
348
                assert_not_reached();
×
349
        }
350

351
        uint64_t device_size;
2,534✔
352
        r = blockdev_get_device_size(loop_with_fd, &device_size);
2,534✔
353
        if (r < 0)
2,534✔
354
                return log_device_debug_errno(dev, r, "Failed to get loopback device size: %m");
×
355

356
        LoopDevice *d = new(LoopDevice, 1);
2,534✔
357
        if (!d)
2,534✔
358
                return log_oom_debug();
×
359

360
        *d = (LoopDevice) {
2,534✔
361
                .n_ref = 1,
362
                .fd = TAKE_FD(loop_with_fd),
2,534✔
363
                .lock_fd = TAKE_FD(lock_fd),
2,534✔
364
                .node = TAKE_PTR(node),
2,534✔
365
                .nr = nr,
366
                .devno = devno,
367
                .dev = TAKE_PTR(dev),
2,534✔
368
                .diskseq = diskseq,
369
                .sector_size = c->block_size,
2,534✔
370
                .device_size = device_size,
371
                .created = true,
372
        };
373

374
        *ret = TAKE_PTR(d);
2,534✔
375
        return 0;
2,534✔
376
}
377

378
static int fd_get_max_discard(int fd, uint64_t *ret) {
×
379
        struct stat st;
×
380
        char sysfs_path[STRLEN("/sys/dev/block/" ":" "/queue/discard_max_bytes") + DECIMAL_STR_MAX(dev_t) * 2 + 1];
×
381
        _cleanup_free_ char *buffer = NULL;
×
382
        int r;
×
383

384
        assert(ret);
×
385

386
        if (fstat(ASSERT_FD(fd), &st) < 0)
×
387
                return -errno;
×
388

389
        if (!S_ISBLK(st.st_mode))
×
390
                return -ENOTBLK;
391

392
        xsprintf(sysfs_path, "/sys/dev/block/" DEVNUM_FORMAT_STR "/queue/discard_max_bytes", DEVNUM_FORMAT_VAL(st.st_rdev));
×
393

394
        r = read_one_line_file(sysfs_path, &buffer);
×
395
        if (r < 0)
×
396
                return r;
397

398
        return safe_atou64(buffer, ret);
×
399
}
400

401
static int fd_set_max_discard(int fd, uint64_t max_discard) {
×
402
        struct stat st;
×
403
        char sysfs_path[STRLEN("/sys/dev/block/" ":" "/queue/discard_max_bytes") + DECIMAL_STR_MAX(dev_t) * 2 + 1];
×
404

405
        if (fstat(ASSERT_FD(fd), &st) < 0)
×
406
                return -errno;
×
407

408
        if (!S_ISBLK(st.st_mode))
×
409
                return -ENOTBLK;
410

411
        xsprintf(sysfs_path, "/sys/dev/block/" DEVNUM_FORMAT_STR "/queue/discard_max_bytes", DEVNUM_FORMAT_VAL(st.st_rdev));
×
412

413
        return write_string_filef(sysfs_path, WRITE_STRING_FILE_DISABLE_BUFFER, "%" PRIu64, max_discard);
×
414
}
415

416
static int probe_sector_size_harder(int fd, uint32_t *ret) {
2,421✔
417
        _cleanup_close_ int non_direct_io_fd = -EBADF;
2,421✔
418
        int probe_fd, f_flags;
2,421✔
419

420
        assert(fd >= 0);
2,421✔
421
        assert(ret);
2,421✔
422

423
        /* Wraps probe_sector_size() but handles O_DIRECT: if the fd is opened with O_DIRECT there are
424
         * strict alignment requirements for reads, so we temporarily reopen it without O_DIRECT for the
425
         * probing logic. */
426

427
        f_flags = fcntl(fd, F_GETFL);
2,421✔
428
        if (f_flags < 0)
2,421✔
429
                return -errno;
×
430

431
        if (FLAGS_SET(f_flags, O_DIRECT)) {
2,421✔
432
                non_direct_io_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
2,414✔
433
                if (non_direct_io_fd < 0)
2,414✔
434
                        return non_direct_io_fd;
435

436
                probe_fd = non_direct_io_fd;
437
        } else
438
                probe_fd = fd;
439

440
        return probe_sector_size(probe_fd, ret);
2,421✔
441
}
442

443
static int loop_device_make_internal(
2,554✔
444
                const char *path,
445
                int fd,
446
                int open_flags,
447
                uint64_t offset,
448
                uint64_t size,
449
                uint32_t sector_size,
450
                uint32_t loop_flags,
451
                int lock_op,
452
                LoopDevice **ret) {
453

454
        _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
×
455
        _cleanup_close_ int reopened_fd = -EBADF, control = -EBADF;
5,108✔
456
        _cleanup_free_ char *backing_file = NULL;
2,554✔
457
        struct loop_config config;
2,554✔
458
        int r, f_flags;
2,554✔
459
        struct stat st;
2,554✔
460

461
        assert(fd >= 0);
2,554✔
462
        assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
2,554✔
463
        assert(ret);
2,554✔
464

465
        /* sector_size interpretation:
466
         *   0          → use device sector size for block devices, 512 for regular files
467
         *   UINT32_MAX → probe GPT header to find the right sector size, fall back to 0 behavior
468
         *   other      → use the specified sector size explicitly */
469

470
        f_flags = fcntl(fd, F_GETFL);
2,554✔
471
        if (f_flags < 0)
2,554✔
472
                return -errno;
×
473

474
        if (open_flags < 0) {
2,554✔
475
                /* If open_flags is unset, initialize it from the open fd */
476
                if (FLAGS_SET(f_flags, O_PATH))
6✔
477
                        return log_debug_errno(SYNTHETIC_ERRNO(EBADFD), "Access mode of image file indicates O_PATH, cannot determine read/write flags.");
×
478

479
                open_flags = f_flags & O_ACCMODE_STRICT;
6✔
480
                if (!IN_SET(open_flags, O_RDWR, O_RDONLY))
6✔
481
                        return log_debug_errno(SYNTHETIC_ERRNO(EBADFD), "Access mode of image file is write only (?)");
×
482
        }
483

484
        if (sector_size == UINT32_MAX) {
2,554✔
485
                /* If sector size is specified as UINT32_MAX, we'll try to probe the right sector size
486
                 * by looking for the GPT partition header at various offsets. This of course only works
487
                 * if the image already has a disk label. */
488

489
                r = probe_sector_size_harder(fd, &sector_size);
2,421✔
490
                if (r < 0)
2,421✔
491
                        return r;
492
                if (r == 0)
2,421✔
493
                        sector_size = 0; /* If we can't probe anything, use default sector size. */
2,306✔
494
        }
495

496
        if (fstat(fd, &st) < 0)
2,554✔
497
                return -errno;
×
498

499
        if (S_ISBLK(st.st_mode)) {
2,554✔
500
                uint32_t device_ssz;
×
501
                r = blockdev_get_sector_size(fd, &device_ssz);
×
502
                if (r < 0)
×
503
                        return r;
×
504

505
                if (sector_size == 0)
×
506
                        sector_size = device_ssz;
×
507

508
                if (offset == 0 && IN_SET(size, 0, UINT64_MAX) && sector_size == device_ssz)
×
509
                        /* If this is already a block device and we are supposed to cover the whole of it
510
                         * then store an fd to the original open device node — and do not actually create
511
                         * an unnecessary loopback device for it. If an explicit sector size was requested
512
                         * that differs from the device sector size, or if the probed GPT sector size
513
                         * differs (e.g. CD-ROMs with 2048-byte blocks but a 512-byte sector GPT), create
514
                         * a real loop device to change the sector size. */
515
                        return loop_device_open_from_fd(fd, open_flags, lock_op, ret);
×
516
        } else {
517
                r = stat_verify_regular(&st);
2,554✔
518
                if (r < 0)
2,554✔
519
                        return r;
520

521
                if (sector_size == 0)
2,554✔
522
                        sector_size = 512;
2,306✔
523
        }
524

525
        if (path) {
2,554✔
526
                r = path_make_absolute_cwd(path, &backing_file);
335✔
527
                if (r < 0)
335✔
528
                        return r;
529

530
                path_simplify(backing_file);
335✔
531
        } else {
532
                r = fd_get_path(fd, &backing_file);
2,219✔
533
                if (r < 0)
2,219✔
534
                        return r;
535
        }
536

537
        if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) != FLAGS_SET(f_flags, O_DIRECT)) {
2,554✔
538
                /* If LO_FLAGS_DIRECT_IO is requested, then make sure we have the fd open with O_DIRECT, as
539
                 * that's required. Conversely, if it's off require that O_DIRECT is off too (that's because
540
                 * new kernels will implicitly enable LO_FLAGS_DIRECT_IO if O_DIRECT is set).
541
                 *
542
                 * Our intention here is that LO_FLAGS_DIRECT_IO is the primary knob, and O_DIRECT derived
543
                 * from that automatically. */
544

545
                reopened_fd = fd_reopen(fd, (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0)|O_CLOEXEC|O_NONBLOCK|open_flags);
139✔
546
                if (reopened_fd < 0) {
139✔
547
                        if (!FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO))
×
548
                                return log_debug_errno(reopened_fd, "Failed to reopen file descriptor without O_DIRECT: %m");
×
549

550
                        /* Some file systems might not support O_DIRECT, let's gracefully continue without it then. */
551
                        log_debug_errno(reopened_fd, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without.");
×
552
                        loop_flags &= ~LO_FLAGS_DIRECT_IO;
×
553
                } else
554
                        fd = reopened_fd; /* From now on, operate on our new O_DIRECT fd */
555
        }
556

557
        control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
2,554✔
558
        if (control < 0)
2,554✔
559
                return -errno;
20✔
560

561
        /* Strip LO_FLAGS_PARTSCAN from LOOP_CONFIGURE and enable it afterwards via
562
         * LOOP_SET_STATUS64 to work around a kernel race: LOOP_CONFIGURE sends a uevent with
563
         * GD_NEED_PART_SCAN set before calling loop_reread_partitions(). If udev opens the device in
564
         * response, blkdev_get_whole() triggers a first scan, then loop_reread_partitions() does a
565
         * second scan that briefly drops all partitions. By configuring without partscan,
566
         * GD_SUPPRESS_PART_SCAN stays set, making any concurrent open harmless. LOOP_SET_STATUS64
567
         * doesn't call disk_force_media_change() so it doesn't set GD_NEED_PART_SCAN.
568
         *
569
         * See: https://lore.kernel.org/linux-block/20260330081819.652890-1-daan@amutable.com/T/#u
570
         * Drop this workaround once the kernel fix is widely available. */
571
        bool deferred_partscan = FLAGS_SET(loop_flags, LO_FLAGS_PARTSCAN);
2,534✔
572

573
        config = (struct loop_config) {
5,068✔
574
                .fd = fd,
575
                .block_size = sector_size,
576
                .info = {
577
                        /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
578
                        .lo_flags = ((loop_flags & ~(LO_FLAGS_READ_ONLY|LO_FLAGS_PARTSCAN)) |
5,068✔
579
                                     ((open_flags & O_ACCMODE_STRICT) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) |
2,534✔
580
                                     LO_FLAGS_AUTOCLEAR),
581
                        .lo_offset = offset,
582
                        .lo_sizelimit = size == UINT64_MAX ? 0 : size,
2,534✔
583
                },
584
        };
585

586
        /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
587
         * be gone already, taken by somebody else racing against us. */
588
        for (unsigned n_attempts = 0;;) {
2,534✔
589
                usec_t usec;
2,590✔
590
                int nr;
2,590✔
591

592
                /* Let's take a lock on the control device first. On a busy system, where many programs
593
                 * attempt to allocate a loopback device at the same time, we might otherwise keep looping
594
                 * around relatively heavy operations: asking for a free loopback device, then opening it,
595
                 * validating it, attaching something to it. Let's serialize this whole operation, to make
596
                 * unnecessary busywork less likely. Note that this is just something we do to optimize our
597
                 * own code (and whoever else decides to use LOCK_EX locks for this), taking this lock is not
598
                 * necessary, it just means it's less likely we have to iterate through this loop again and
599
                 * again if our own code races against our own code.
600
                 *
601
                 * Note: our lock protocol is to take the /dev/loop-control lock first, and the block device
602
                 * lock second, if both are taken, and always in this order, to avoid ABBA locking issues. */
603
                if (flock(control, LOCK_EX) < 0)
2,590✔
604
                        return -errno;
×
605

606
                nr = ioctl(control, LOOP_CTL_GET_FREE);
2,590✔
607
                if (nr < 0)
2,590✔
608
                        return -errno;
×
609

610
                r = loop_configure(nr, open_flags, lock_op, &config, &d);
2,590✔
611
                if (r >= 0)
2,590✔
612
                        break;
613

614
                /* -ENODEV or friends: Somebody might've gotten the same number from the kernel, used the
615
                 * device, and called LOOP_CTL_REMOVE on it. Let's retry with a new number.
616
                 * -EBUSY: a file descriptor is already bound to the loopback block device.
617
                 * -EUCLEAN: some left-over partition devices that were cleaned up.
618
                 * -ENOANO: we tried to use LO_FLAGS_DIRECT_IO but the kernel rejected it. */
619
                if (!ERRNO_IS_DEVICE_ABSENT(r) && !IN_SET(r, -EBUSY, -EUCLEAN, -ENOANO))
56✔
620
                        return r;
621

622
                /* OK, this didn't work, let's try again a bit later, but first release the lock on the
623
                 * control device */
624
                if (flock(control, LOCK_UN) < 0)
56✔
625
                        return -errno;
×
626

627
                if (++n_attempts >= 64) /* Give up eventually */
56✔
628
                        return -EBUSY;
629

630
                /* If we failed to enable direct IO mode, let's retry without it. We restart the process as
631
                 * on some combination of kernel version and storage filesystem, the kernel is very unhappy
632
                 * about a failed DIRECT_IO enablement and throws I/O errors. */
633
                if (r == -ENOANO && FLAGS_SET(config.info.lo_flags, LO_FLAGS_DIRECT_IO)) {
56✔
634
                        config.info.lo_flags &= ~LO_FLAGS_DIRECT_IO;
×
635
                        open_flags &= ~O_DIRECT;
×
636

637
                        int non_direct_io_fd = fd_reopen(config.fd, O_CLOEXEC|O_NONBLOCK|open_flags);
×
638
                        if (non_direct_io_fd < 0)
×
639
                                return log_debug_errno(
×
640
                                                non_direct_io_fd,
641
                                                "Failed to reopen file descriptor without O_DIRECT: %m");
642

643
                        safe_close(reopened_fd);
×
644
                        fd = config.fd = /* For cleanups */ reopened_fd = non_direct_io_fd;
×
645
                }
646

647
                /* Wait some random time, to make collision less likely. Let's pick a random time in the
648
                 * range 0ms…250ms, linearly scaled by the number of failed attempts. */
649
                usec = random_u64_range(UINT64_C(10) * USEC_PER_MSEC +
112✔
650
                                        UINT64_C(240) * USEC_PER_MSEC * n_attempts/64);
56✔
651
                log_debug("Trying again after %s.", FORMAT_TIMESPAN(usec, USEC_PER_MSEC));
56✔
652
                (void) usleep_safe(usec);
56✔
653
        }
654

655
        if (S_ISBLK(st.st_mode)) {
2,534✔
656
                /* Propagate backing device's discard byte limit to our loopback block device. We do this in
657
                 * order to avoid that (supposedly quick) discard requests on the loopback device get turned
658
                 * into (likely slow) zero-out requests on backing devices that do not support discarding
659
                 * natively, but do support zero-out. */
660
                uint64_t discard_max_bytes;
×
661

662
                r = fd_get_max_discard(fd, &discard_max_bytes);
×
663
                if (r < 0)
×
664
                        log_debug_errno(r, "Failed to read 'discard_max_bytes' of backing device, ignoring: %m");
×
665
                else {
666
                        r = fd_set_max_discard(d->fd, discard_max_bytes);
×
667
                        if (r < 0)
×
668
                                log_debug_errno(r, "Failed to write 'discard_max_bytes' of loop device, ignoring: %m");
×
669
                }
670
        }
671

672
        if (deferred_partscan) {
2,534✔
673
                /* Open+close to drain GD_NEED_PART_SCAN harmlessly (GD_SUPPRESS_PART_SCAN is still
674
                 * set so no partitions appear). Then enable partscan via LOOP_SET_STATUS64. */
675
                int tmp_fd = fd_reopen(d->fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
2,357✔
676
                if (tmp_fd < 0)
2,357✔
677
                        return log_debug_errno(tmp_fd, "Failed to reopen loop device to drain partscan flag: %m");
×
678
                safe_close(tmp_fd);
2,357✔
679

680
                struct loop_info64 info;
2,357✔
681
                if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
2,357✔
682
                        return log_debug_errno(errno, "Failed to get loop device status: %m");
×
683

684
                info.lo_flags |= LO_FLAGS_PARTSCAN;
2,357✔
685

686
                if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
2,357✔
687
                        return log_debug_errno(errno, "Failed to enable partscan on loop device: %m");
×
688
        }
689

690
        d->backing_file = TAKE_PTR(backing_file);
2,534✔
691
        d->backing_inode = st.st_ino;
2,534✔
692
        d->backing_devno = st.st_dev;
2,534✔
693

694
        log_debug("Successfully acquired %s, devno=%u:%u, nr=%i, diskseq=%" PRIu64,
2,534✔
695
                  d->node,
696
                  major(d->devno), minor(d->devno),
697
                  d->nr,
698
                  d->diskseq);
699

700
        *ret = TAKE_PTR(d);
2,534✔
701
        return 0;
2,534✔
702
}
703

704
static uint32_t loop_flags_mangle(uint32_t loop_flags) {
2,554✔
705
        int r;
2,554✔
706

707
        r = getenv_bool("SYSTEMD_LOOP_DIRECT_IO");
2,554✔
708
        if (r < 0 && r != -ENXIO)
2,554✔
709
                log_debug_errno(r, "Failed to parse $SYSTEMD_LOOP_DIRECT_IO, ignoring: %m");
×
710

711
        return UPDATE_FLAG(loop_flags, LO_FLAGS_DIRECT_IO, r != 0); /* Turn on LO_FLAGS_DIRECT_IO by default, unless explicitly configured to off. */
2,554✔
712
}
713

714
int loop_device_make(
139✔
715
                int fd,
716
                int open_flags,
717
                uint64_t offset,
718
                uint64_t size,
719
                uint32_t sector_size,
720
                uint32_t loop_flags,
721
                int lock_op,
722
                LoopDevice **ret) {
723

724
        assert(fd >= 0);
139✔
725
        assert(ret);
139✔
726

727
        return loop_device_make_internal(
139✔
728
                        NULL,
729
                        fd,
730
                        open_flags,
731
                        offset,
732
                        size,
733
                        sector_size,
734
                        loop_flags_mangle(loop_flags),
735
                        lock_op,
736
                        ret);
737
}
738

739
int loop_device_make_by_path_at(
2,415✔
740
                int dir_fd,
741
                const char *path,
742
                int open_flags,
743
                uint32_t sector_size,
744
                uint32_t loop_flags,
745
                int lock_op,
746
                LoopDevice **ret) {
747

748
        int r, basic_flags, direct_flags, rdwr_flags;
2,415✔
749
        _cleanup_close_ int fd = -EBADF;
2,415✔
750
        bool direct = false;
2,415✔
751

752
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
2,415✔
753
        assert(ret);
2,415✔
754
        assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
2,415✔
755

756
        /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying
757
         * read-only if we cannot. */
758

759
        loop_flags = loop_flags_mangle(loop_flags);
2,415✔
760

761
        /* Let's open with O_DIRECT if we can. But not all file systems support that, hence fall back to
762
         * non-O_DIRECT mode automatically, if it fails. */
763

764
        basic_flags = O_CLOEXEC|O_NONBLOCK|O_NOCTTY;
2,415✔
765
        direct_flags = FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0;
2,415✔
766
        rdwr_flags = open_flags >= 0 ? open_flags : O_RDWR;
2,415✔
767

768
        fd = xopenat(dir_fd, path, basic_flags|direct_flags|rdwr_flags);
2,415✔
769
        if (fd < 0 && direct_flags != 0) /* If we had O_DIRECT on, and things failed with that, let's immediately try again without */
2,415✔
770
                fd = xopenat(dir_fd, path, basic_flags|rdwr_flags);
1✔
771
        else
772
                direct = direct_flags != 0;
×
773
        if (fd < 0) {
2,415✔
774
                r = fd;
1✔
775

776
                /* Retry read-only? */
777
                if (open_flags >= 0 || !ERRNO_IS_NEG_FS_WRITE_REFUSED(r))
2,416✔
778
                        return r;
779

780
                fd = xopenat(dir_fd, path, basic_flags|direct_flags|O_RDONLY);
×
781
                if (fd < 0 && direct_flags != 0) /* as above */
×
782
                        fd = xopenat(dir_fd, path, basic_flags|O_RDONLY);
×
783
                else
784
                        direct = direct_flags != 0;
×
785
                if (fd < 0)
×
786
                        return r; /* Propagate original error */
787

788
                open_flags = O_RDONLY;
789
        } else if (open_flags < 0)
2,414✔
790
                open_flags = O_RDWR;
142✔
791

792
        log_debug("Opened %s in %s access mode%s, with O_DIRECT %s%s.",
11,539✔
793
                  path ?: "loop device",
794
                  open_flags == O_RDWR ? "O_RDWR" : "O_RDONLY",
795
                  open_flags != rdwr_flags ? " (O_RDWR was requested but not allowed)" : "",
796
                  direct ? "enabled" : "disabled",
797
                  direct != (direct_flags != 0) ? " (O_DIRECT was requested but not supported)" : "");
798

799
        return loop_device_make_internal(
4,493✔
800
                        dir_fd == AT_FDCWD ? path : NULL,
801
                        fd,
802
                        open_flags,
803
                        /* offset= */ 0,
804
                        /* size= */ 0,
805
                        sector_size,
806
                        loop_flags,
807
                        lock_op,
808
                        ret);
809
}
810

811
int loop_device_make_by_path_memory(
1✔
812
                const char *path,
813
                int open_flags,
814
                uint32_t sector_size,
815
                uint32_t loop_flags,
816
                int lock_op,
817
                LoopDevice **ret) {
818

819
        _cleanup_close_ int fd = -EBADF, mfd = -EBADF;
1✔
820
        _cleanup_free_ char *fn = NULL;
1✔
821
        struct stat st;
1✔
822
        int r;
1✔
823

824
        assert(path);
1✔
825
        assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
1✔
826
        assert(ret);
1✔
827

828
        /* memfds are always writable, so default to O_RDWR when auto-detecting. */
829
        if (open_flags < 0)
1✔
830
                open_flags = O_RDWR;
1✔
831

832
        loop_flags &= ~LO_FLAGS_DIRECT_IO; /* memfds don't support O_DIRECT, hence LO_FLAGS_DIRECT_IO can't be used either */
1✔
833

834
        fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
1✔
835
        if (fd < 0)
1✔
836
                return -errno;
×
837

838
        if (fstat(fd, &st) < 0)
1✔
839
                return -errno;
×
840

841
        if (!S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode))
1✔
842
                return -EBADF;
843

844
        r = path_extract_filename(path, &fn);
1✔
845
        if (r < 0)
1✔
846
                return r;
847

848
        mfd = memfd_clone_fd(fd, fn, open_flags|O_CLOEXEC);
1✔
849
        if (mfd < 0)
1✔
850
                return mfd;
851

852
        fd = safe_close(fd); /* Let's close the original early */
1✔
853

854
        return loop_device_make_internal(NULL, mfd, open_flags, 0, 0, sector_size, loop_flags, lock_op, ret);
1✔
855
}
856

857
static LoopDevice* loop_device_free(LoopDevice *d) {
2,568✔
858
        _cleanup_close_ int control = -EBADF;
2,568✔
859
        int r;
2,568✔
860

861
        if (!d)
2,568✔
862
                return NULL;
863

864
        /* Release any lock we might have on the device first. We want to open+lock the /dev/loop-control
865
         * device below, but our lock protocol says that if both control and block device locks are taken,
866
         * the control lock needs to be taken first, the block device lock second — in order to avoid ABBA
867
         * locking issues. Moreover, we want to issue LOOP_CLR_FD on the block device further down, and that
868
         * would fail if we had another fd open to the device. */
869
        d->lock_fd = safe_close(d->lock_fd);
2,568✔
870

871
        /* Let's open the control device early, and lock it, so that we can release our block device and
872
         * delete it in a synchronized fashion, and allocators won't needlessly see the block device as free
873
         * while we are about to delete it. */
874
        if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
2,568✔
875
                control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
2,262✔
876
                if (control < 0)
2,262✔
877
                        log_debug_errno(errno, "Failed to open loop control device, cannot remove loop device '%s', ignoring: %m", strna(d->node));
×
878
                else if (flock(control, LOCK_EX) < 0)
2,262✔
879
                        log_debug_errno(errno, "Failed to lock loop control device, ignoring: %m");
×
880
        }
881

882
        /* Then let's release the loopback block device */
883
        if (d->fd >= 0) {
2,568✔
884
                /* Implicitly sync the device, since otherwise in-flight blocks might not get written */
885
                if (fsync(d->fd) < 0)
2,568✔
886
                        log_debug_errno(errno, "Failed to sync loop block device, ignoring: %m");
2✔
887

888
                if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
2,568✔
889
                        /* We are supposed to clear the loopback device. Let's do this synchronously: lock
890
                         * the device, manually remove all partitions and then clear it. This should ensure
891
                         * udev doesn't concurrently access the devices, and we can be reasonably sure that
892
                         * once we are done here the device is cleared and all its partition children
893
                         * removed. Note that we lock our primary device fd here (and not a separate locking
894
                         * fd, as we do during allocation, since we want to keep the lock all the way through
895
                         * the LOOP_CLR_FD, but that call would fail if we had more than one fd open.) */
896

897
                        if (flock(d->fd, LOCK_EX) < 0)
2,262✔
898
                                log_debug_errno(errno, "Failed to lock loop block device, ignoring: %m");
×
899

900
                        r = block_device_remove_all_partitions(d->dev, d->fd);
2,262✔
901
                        if (r < 0)
2,262✔
902
                                log_debug_errno(r, "Failed to remove partitions of loopback block device, ignoring: %m");
×
903

904
                        if (ioctl(d->fd, LOOP_CLR_FD) < 0)
2,262✔
905
                                log_debug_errno(errno, "Failed to clear loop device, ignoring: %m");
×
906
                }
907

908
                safe_close(d->fd);
2,568✔
909
        }
910

911
        /* Now that the block device is released, let's also try to remove it */
912
        if (control >= 0) {
2,568✔
913
                useconds_t delay = 5 * USEC_PER_MSEC;  /* A total delay of 5090 ms between 39 attempts,
914
                                                        * (4*5 + 5*10 + 5*20 + … + 3*640) = 5090. */
915

916
                for (unsigned attempt = 1;; attempt++) {
101✔
917
                        if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
2,363✔
918
                                break;
919
                        if (errno != EBUSY || attempt > 38) {
102✔
920
                                log_debug_errno(errno, "Failed to remove device %s: %m", strna(d->node));
1✔
921
                                break;
922
                        }
923
                        if (attempt % 5 == 0) {
101✔
924
                                log_debug("Device is still busy after %u attempts…", attempt);
14✔
925
                                delay *= 2;
14✔
926
                        }
927

928
                        (void) usleep_safe(delay);
101✔
929
                }
930
        }
931

932
        free(d->node);
2,568✔
933
        sd_device_unref(d->dev);
2,568✔
934
        free(d->backing_file);
2,568✔
935
        return mfree(d);
2,568✔
936
}
937

938
DEFINE_TRIVIAL_REF_UNREF_FUNC(LoopDevice, loop_device, loop_device_free);
8,019✔
939

940
void loop_device_relinquish(LoopDevice *d) {
195✔
941
        assert(d);
195✔
942

943
        /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
944
         * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
945

946
        d->relinquished = true;
195✔
947
}
195✔
948

949
void loop_device_unrelinquish(LoopDevice *d) {
22✔
950
        assert(d);
22✔
951
        d->relinquished = false;
22✔
952
}
22✔
953

954
int loop_device_open(
132✔
955
                sd_device *dev,
956
                int open_flags,
957
                int lock_op,
958
                LoopDevice **ret) {
959

960
        _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
132✔
961
        _cleanup_free_ char *node = NULL, *backing_file = NULL;
132✔
962
        dev_t devnum, backing_devno = 0;
132✔
963
        struct loop_info64 info;
132✔
964
        ino_t backing_inode = 0;
132✔
965
        uint64_t diskseq = 0;
132✔
966
        LoopDevice *d;
132✔
967
        const char *s;
132✔
968
        int r, nr = -1;
132✔
969

970
        assert(dev);
132✔
971
        assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
132✔
972
        assert(ret);
132✔
973

974
        /* Even if fd is provided through the argument in loop_device_open_from_fd(), we reopen the inode
975
         * here, instead of keeping just a dup() clone of it around, since we want to ensure that the
976
         * O_DIRECT flag of the handle we keep is off, we have our own file index, and have the right
977
         * read/write mode in effect. */
978
        fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
132✔
979
        if (fd < 0)
132✔
980
                return fd;
981

982
        if ((lock_op & ~LOCK_NB) != LOCK_UN) {
132✔
983
                lock_fd = open_lock_fd(fd, lock_op);
132✔
984
                if (lock_fd < 0)
132✔
985
                        return lock_fd;
986
        }
987

988
        if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0) {
132✔
989
#if HAVE_VALGRIND_MEMCHECK_H
990
                /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
991
                VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
992
#endif
993
                nr = info.lo_number;
24✔
994

995
                if (sd_device_get_sysattr_value(dev, "loop/backing_file", &s) >= 0) {
24✔
996
                        backing_file = strdup(s);
15✔
997
                        if (!backing_file)
15✔
998
                                return -ENOMEM;
999
                }
1000

1001
                backing_devno = info.lo_device;
24✔
1002
                backing_inode = info.lo_inode;
24✔
1003
        }
1004

1005
        r = fd_get_diskseq(fd, &diskseq);
132✔
1006
        if (r < 0 && r != -EOPNOTSUPP)
132✔
1007
                return r;
1008

1009
        uint32_t sector_size;
132✔
1010
        r = blockdev_get_sector_size(fd, &sector_size);
132✔
1011
        if (r < 0)
132✔
1012
                return r;
1013

1014
        uint64_t device_size;
132✔
1015
        r = blockdev_get_device_size(fd, &device_size);
132✔
1016
        if (r < 0)
132✔
1017
                return r;
1018

1019
        r = sd_device_get_devnum(dev, &devnum);
132✔
1020
        if (r < 0)
132✔
1021
                return r;
1022

1023
        r = sd_device_get_devname(dev, &s);
132✔
1024
        if (r < 0)
132✔
1025
                return r;
1026

1027
        node = strdup(s);
132✔
1028
        if (!node)
132✔
1029
                return -ENOMEM;
1030

1031
        d = new(LoopDevice, 1);
132✔
1032
        if (!d)
132✔
1033
                return -ENOMEM;
1034

1035
        *d = (LoopDevice) {
264✔
1036
                .n_ref = 1,
1037
                .fd = TAKE_FD(fd),
132✔
1038
                .lock_fd = TAKE_FD(lock_fd),
132✔
1039
                .nr = nr,
1040
                .node = TAKE_PTR(node),
132✔
1041
                .dev = sd_device_ref(dev),
132✔
1042
                .backing_file = TAKE_PTR(backing_file),
132✔
1043
                .backing_inode = backing_inode,
1044
                .backing_devno = backing_devno,
1045
                .relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
1046
                .devno = devnum,
1047
                .diskseq = diskseq,
1048
                .sector_size = sector_size,
1049
                .device_size = device_size,
1050
                .created = false,
1051
        };
1052

1053
        *ret = d;
132✔
1054
        return 0;
132✔
1055
}
1056

1057
int loop_device_open_from_fd(
2✔
1058
                int fd,
1059
                int open_flags,
1060
                int lock_op,
1061
                LoopDevice **ret) {
1062

1063
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
2✔
1064
        int r;
2✔
1065

1066
        r = block_device_new_from_fd(ASSERT_FD(fd), 0, &dev);
2✔
1067
        if (r < 0)
2✔
1068
                return r;
1069

1070
        return loop_device_open(dev, open_flags, lock_op, ret);
2✔
1071
}
1072

1073
int loop_device_open_from_path(
×
1074
                const char *path,
1075
                int open_flags,
1076
                int lock_op,
1077
                LoopDevice **ret) {
1078

1079
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
×
1080
        int r;
×
1081

1082
        assert(path);
×
1083

1084
        r = block_device_new_from_path(path, 0, &dev);
×
1085
        if (r < 0)
×
1086
                return r;
1087

1088
        return loop_device_open(dev, open_flags, lock_op, ret);
×
1089
}
1090

1091
static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
×
1092
        char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1];
×
1093
        _cleanup_free_ char *buffer = NULL;
×
1094
        uint64_t current_offset, current_size, partno;
×
1095
        _cleanup_close_ int whole_fd = -EBADF;
×
1096
        struct stat st;
×
1097
        dev_t devno;
×
1098
        int r;
×
1099

1100
        /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
1101
         * loopback device), and changes the offset, if needed. This is a fancy wrapper around
1102
         * BLKPG_RESIZE_PARTITION. */
1103

1104
        if (fstat(ASSERT_FD(partition_fd), &st) < 0)
×
1105
                return -errno;
×
1106

1107
        assert(S_ISBLK(st.st_mode));
×
1108

1109
        xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev));
×
1110
        r = read_one_line_file(sysfs, &buffer);
×
1111
        if (r == -ENOENT) /* not a partition, cannot resize */
×
1112
                return -ENOTTY;
1113
        if (r < 0)
×
1114
                return r;
1115
        r = safe_atou64(buffer, &partno);
×
1116
        if (r < 0)
×
1117
                return r;
1118

1119
        xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/start", DEVNUM_FORMAT_VAL(st.st_rdev));
×
1120

1121
        buffer = mfree(buffer);
×
1122
        r = read_one_line_file(sysfs, &buffer);
×
1123
        if (r < 0)
×
1124
                return r;
1125
        r = safe_atou64(buffer, &current_offset);
×
1126
        if (r < 0)
×
1127
                return r;
1128
        if (current_offset > UINT64_MAX/512U)
×
1129
                return -EINVAL;
1130
        current_offset *= 512U;
×
1131

1132
        r = blockdev_get_device_size(partition_fd, &current_size);
×
1133
        if (r < 0)
×
1134
                return r;
1135

1136
        if (size == UINT64_MAX && offset == UINT64_MAX)
×
1137
                return 0;
1138
        if (current_size == size && current_offset == offset)
×
1139
                return 0;
1140

1141
        xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/../dev", DEVNUM_FORMAT_VAL(st.st_rdev));
×
1142

1143
        buffer = mfree(buffer);
×
1144
        r = read_one_line_file(sysfs, &buffer);
×
1145
        if (r < 0)
×
1146
                return r;
1147
        r = parse_devnum(buffer, &devno);
×
1148
        if (r < 0)
×
1149
                return r;
1150

1151
        whole_fd = r = device_open_from_devnum(S_IFBLK, devno, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY, NULL);
×
1152
        if (r < 0)
×
1153
                return r;
1154

1155
        return block_device_resize_partition(
×
1156
                        whole_fd,
1157
                        partno,
1158
                        offset == UINT64_MAX ? current_offset : offset,
1159
                        size == UINT64_MAX ? current_size : size);
1160
}
1161

1162
int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
22✔
1163
        struct loop_info64 info;
22✔
1164

1165
        assert(d);
22✔
1166
        assert(d->fd >= 0);
22✔
1167

1168
        /* Changes the offset/start of the loop device relative to the beginning of the underlying file or
1169
         * block device. If this loop device actually refers to a partition and not a loopback device, we'll
1170
         * try to adjust the partition offsets instead.
1171
         *
1172
         * If either offset or size is UINT64_MAX we won't change that parameter. */
1173

1174
        if (d->nr < 0) /* not a loopback device */
22✔
1175
                return resize_partition(d->fd, offset, size);
×
1176

1177
        if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
22✔
1178
                return -errno;
×
1179

1180
#if HAVE_VALGRIND_MEMCHECK_H
1181
        /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
1182
        VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
1183
#endif
1184

1185
        if ((size == UINT64_MAX || info.lo_sizelimit == size) &&
22✔
1186
            (offset == UINT64_MAX || info.lo_offset == offset))
×
1187
                return 0;
1188

1189
        if (size != UINT64_MAX)
22✔
1190
                info.lo_sizelimit = size;
22✔
1191
        if (offset != UINT64_MAX)
22✔
1192
                info.lo_offset = offset;
×
1193

1194
        return RET_NERRNO(ioctl(d->fd, LOOP_SET_STATUS64, &info));
22✔
1195
}
1196

1197
int loop_device_flock(LoopDevice *d, int operation) {
157✔
1198
        assert(IN_SET(operation & ~LOCK_NB, LOCK_UN, LOCK_SH, LOCK_EX));
157✔
1199
        assert(d);
157✔
1200

1201
        /* When unlocking just close the lock fd */
1202
        if ((operation & ~LOCK_NB) == LOCK_UN) {
157✔
1203
                d->lock_fd = safe_close(d->lock_fd);
155✔
1204
                return 0;
155✔
1205
        }
1206

1207
        /* If we had no lock fd so far, create one and lock it right-away */
1208
        if (d->lock_fd < 0) {
2✔
1209
                d->lock_fd = open_lock_fd(ASSERT_FD(d->fd), operation);
1✔
1210
                if (d->lock_fd < 0)
1✔
1211
                        return d->lock_fd;
1212

1213
                return 0;
1✔
1214
        }
1215

1216
        /* Otherwise change the current lock mode on the existing fd */
1217
        return RET_NERRNO(flock(d->lock_fd, operation));
1✔
1218
}
1219

1220
int loop_device_sync(LoopDevice *d) {
78✔
1221
        assert(d);
78✔
1222

1223
        /* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that
1224
         * we can check the return value though. */
1225

1226
        return RET_NERRNO(fsync(ASSERT_FD(d->fd)));
78✔
1227
}
1228

1229
int loop_device_set_autoclear(LoopDevice *d, bool autoclear) {
8✔
1230
        struct loop_info64 info;
8✔
1231

1232
        assert(d);
8✔
1233

1234
        if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
8✔
1235
                return -errno;
×
1236

1237
        if (autoclear == FLAGS_SET(info.lo_flags, LO_FLAGS_AUTOCLEAR))
8✔
1238
                return 0;
1239

1240
        SET_FLAG(info.lo_flags, LO_FLAGS_AUTOCLEAR, autoclear);
8✔
1241

1242
        if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
8✔
1243
                return -errno;
×
1244

1245
        return 1;
1246
}
1247

1248
int loop_device_set_filename(LoopDevice *d, const char *name) {
4✔
1249
        struct loop_info64 info;
4✔
1250

1251
        assert(d);
4✔
1252

1253
        /* Sets the .lo_file_name of the loopback device. This is supposed to contain the path to the file
1254
         * backing the block device, but is actually just a free-form string you can pass to the kernel. Most
1255
         * tools that actually care for the backing file path use the sysfs attribute file loop/backing_file
1256
         * which is a kernel generated string, subject to file system namespaces and such.
1257
         *
1258
         * .lo_file_name is useful since userspace can select it freely when creating a loopback block
1259
         * device, and we can use it for /dev/disk/by-loop-ref/ symlinks, and similar, so that apps can
1260
         * recognize their own loopback files. */
1261

1262
        if (name && strlen(name) >= sizeof(info.lo_file_name))
4✔
1263
                return -ENOBUFS;
4✔
1264

1265
        if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
4✔
1266
                return -errno;
×
1267

1268
        if (strneq((char*) info.lo_file_name, strempty(name), sizeof(info.lo_file_name)))
4✔
1269
                return 0;
1270

1271
        if (name) {
4✔
1272
                strncpy((char*) info.lo_file_name, name, sizeof(info.lo_file_name)-1);
4✔
1273
                info.lo_file_name[sizeof(info.lo_file_name)-1] = 0;
4✔
1274
        } else
1275
                memzero(info.lo_file_name, sizeof(info.lo_file_name));
×
1276

1277
        if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
4✔
1278
                return -errno;
×
1279

1280
        return 1;
1281
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc