• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 25893429527

14 May 2026 09:08PM UTC coverage: 72.364% (-0.2%) from 72.584%
25893429527

push

github

bluca
ci: switch SUSE mkosi mirror to cdn.o.o

The cdn mirror is preferred by SUSE for clouds/CIs. There have been issues with some
mirrors, which fail to download from GHA quite often lately, so hopefully this will
make it reliable again.

328159 of 453485 relevant lines covered (72.36%)

1405869.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

65.22
/src/shared/loop-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#if HAVE_VALGRIND_MEMCHECK_H
4
#include <valgrind/memcheck.h>
5
#endif
6

7
#include <fcntl.h>
8
#include <linux/loop.h>
9
#include <sys/file.h>
10
#include <sys/ioctl.h>
11
#include <unistd.h>
12

13
#include "sd-device.h"
14

15
#include "alloc-util.h"
16
#include "blockdev-util.h"
17
#include "data-fd-util.h"
18
#include "device-util.h"
19
#include "devnum-util.h"
20
#include "dissect-image.h"
21
#include "env-util.h"
22
#include "errno-util.h"
23
#include "fd-util.h"
24
#include "fileio.h"
25
#include "fs-util.h"
26
#include "loop-util.h"
27
#include "parse-util.h"
28
#include "path-util.h"
29
#include "random-util.h"
30
#include "stat-util.h"
31
#include "stdio-util.h"
32
#include "string-util.h"
33
#include "time-util.h"
34

35
static void cleanup_clear_loop_close(int *fd) {
2,545✔
36
        assert(fd);
2,545✔
37

38
        if (*fd < 0)
2,545✔
39
                return;
40

41
        (void) ioctl(*fd, LOOP_CLR_FD);
×
42
        (void) safe_close(*fd);
×
43
}
44

45
static int loop_is_bound(int fd) {
2,545✔
46
        struct loop_info64 info;
2,545✔
47

48
        if (ioctl(ASSERT_FD(fd), LOOP_GET_STATUS64, &info) < 0) {
2,545✔
49
                if (errno == ENXIO)
2,545✔
50
                        return false; /* not bound! */
2,545✔
51

52
                return -errno;
×
53
        }
54

55
        return true; /* bound! */
56
}
57

58
static int open_lock_fd(int primary_fd, int operation) {
2,683✔
59
        _cleanup_close_ int lock_fd = -EBADF;
2,683✔
60

61
        assert(IN_SET(operation & ~LOCK_NB, LOCK_SH, LOCK_EX));
2,683✔
62

63
        lock_fd = fd_reopen(ASSERT_FD(primary_fd), O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2,683✔
64
        if (lock_fd < 0)
2,683✔
65
                return lock_fd;
66

67
        if (flock(lock_fd, operation) < 0)
2,683✔
68
                return -errno;
×
69

70
        return TAKE_FD(lock_fd);
71
}
72

73
static int loop_configure_verify_direct_io(int fd, const struct loop_config *c) {
2,545✔
74
        assert(fd >= 0);
2,545✔
75
        assert(c);
2,545✔
76

77
        if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO)) {
2,545✔
78
                struct loop_info64 info;
2,544✔
79

80
                if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0)
2,544✔
81
                        return log_debug_errno(errno, "Failed to issue LOOP_GET_STATUS64: %m");
×
82

83
#if HAVE_VALGRIND_MEMCHECK_H
84
                VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
85
#endif
86

87
                /* On older kernels (<= 5.3) it was necessary to set the block size of the loopback block
88
                 * device to the logical block size of the underlying file system. Since there was no nice
89
                 * way to query the value, we are not bothering to do this however. On newer kernels the
90
                 * block size is propagated automatically and does not require intervention from us. We'll
91
                 * check here if enabling direct IO worked, to make this easily debuggable however.
92
                 *
93
                 * (Should anyone really care and actually wants direct IO on old kernels: it might be worth
94
                 * enabling direct IO with iteratively larger block sizes until it eventually works.)
95
                 *
96
                 * On older kernels (e.g.: 5.10) when this is attempted on a file stored on a dm-crypt
97
                 * backed partition the kernel will start returning I/O errors when accessing the mounted
98
                 * loop device, so return a recognizable error that causes the operation to be started
99
                 * from scratch without the LO_FLAGS_DIRECT_IO flag. */
100
                if (!FLAGS_SET(info.lo_flags, LO_FLAGS_DIRECT_IO))
2,544✔
101
                        return log_debug_errno(
×
102
                                        SYNTHETIC_ERRNO(ENOANO),
103
                                        "Could not enable direct IO mode, retrying in buffered IO mode.");
104
        }
105

106
        return 0;
107
}
108

109
static int loop_configure_verify(int fd, const struct loop_config *c) {
2,545✔
110
        bool broken = false;
2,545✔
111
        int r;
2,545✔
112

113
        assert(fd >= 0);
2,545✔
114
        assert(c);
2,545✔
115

116
        if (c->block_size != 0) {
2,545✔
117
                uint32_t ssz;
2,545✔
118

119
                r = blockdev_get_sector_size(fd, &ssz);
2,545✔
120
                if (r < 0)
2,545✔
121
                        return r;
×
122

123
                if (ssz != c->block_size) {
2,545✔
124
                        log_debug("LOOP_CONFIGURE didn't honour requested block size %" PRIu32 ", got %" PRIu32 " instead. Ignoring.", c->block_size, ssz);
×
125
                        broken = true;
126
                }
127
        }
128

129
        if (c->info.lo_sizelimit != 0) {
2,545✔
130
                /* Kernel 5.8 vanilla doesn't properly propagate the size limit into the
131
                 * block device. If it's used, let's immediately check if it had the desired
132
                 * effect hence. And if not use classic LOOP_SET_STATUS64. */
133
                uint64_t z;
101✔
134

135
                r = blockdev_get_device_size(fd, &z);
101✔
136
                if (r < 0)
101✔
137
                        return r;
×
138

139
                if (z != c->info.lo_sizelimit) {
101✔
140
                        log_debug("LOOP_CONFIGURE is broken, doesn't honour .info.lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
×
141
                        broken = true;
142
                }
143
        }
144

145
        if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_PARTSCAN)) {
2,545✔
146
                /* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag
147
                 * into the block device. Let's hence verify if things work correctly here
148
                 * before returning. */
149

150
                r = blockdev_partscan_enabled_fd(fd);
×
151
                if (r < 0)
×
152
                        return r;
153
                if (r == 0) {
×
154
                        log_debug("LOOP_CONFIGURE is broken, doesn't honour LO_FLAGS_PARTSCAN. Falling back to LOOP_SET_STATUS64.");
×
155
                        broken = true;
156
                }
157
        }
158

159
        r = loop_configure_verify_direct_io(fd, c);
2,545✔
160
        if (r < 0)
2,545✔
161
                return r;
162

163
        return !broken;
2,545✔
164
}
165

166
static int loop_configure_fallback(int fd, const struct loop_config *c) {
×
167
        struct loop_info64 info_copy;
×
168
        int r;
×
169

170
        assert(fd >= 0);
×
171
        assert(c);
×
172

173
        /* Only some of the flags LOOP_CONFIGURE can set are also settable via LOOP_SET_STATUS64, hence mask
174
         * them out. */
175
        info_copy = c->info;
×
176
        info_copy.lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS;
×
177

178
        /* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64
179
         * ioctl can return EAGAIN in case we change the info.lo_offset field, if someone else is accessing the
180
         * block device while we try to reconfigure it. This is a pretty common case, since udev might
181
         * instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways:
182
         * first, let's take the BSD lock to ensure that udev will not step in between the point in
183
         * time where we attach the fd and where we reconfigure the device. Secondly, let's wait 50ms on
184
         * EAGAIN and retry. The former should be an efficient mechanism to avoid we have to wait 50ms
185
         * needlessly if we are just racing against udev. The latter is protection against all other cases,
186
         * i.e. peers that do not take the BSD lock. */
187

188
        for (unsigned n_attempts = 0;;) {
×
189
                if (ioctl(fd, LOOP_SET_STATUS64, &info_copy) >= 0)
×
190
                        break;
191

192
                if (errno != EAGAIN || ++n_attempts >= 64)
×
193
                        return log_debug_errno(errno, "Failed to configure loopback block device: %m");
×
194

195
                /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more
196
                 * failed attempts we see */
197
                (void) usleep_safe(UINT64_C(10) * USEC_PER_MSEC +
×
198
                              random_u64_range(UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
×
199
        }
200

201
        /* If a block size is requested then try to configure it. If that doesn't work, ignore errors, but
202
         * afterwards, let's validate what is in effect, and if it doesn't match what we want, fail */
203
        if (c->block_size != 0) {
×
204
                uint32_t ssz;
×
205

206
                if (ioctl(fd, LOOP_SET_BLOCK_SIZE, (unsigned long) c->block_size) < 0)
×
207
                        log_debug_errno(errno, "Failed to set sector size, ignoring: %m");
×
208

209
                r = blockdev_get_sector_size(fd, &ssz);
×
210
                if (r < 0)
×
211
                        return log_debug_errno(r, "Failed to read sector size: %m");
×
212
                if (ssz != c->block_size)
×
213
                        return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Sector size of loopback device doesn't match what we requested, refusing.");
×
214
        }
215

216
        /* LO_FLAGS_DIRECT_IO is a flags we need to configure via explicit ioctls. */
217
        if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO))
×
218
                if (ioctl(fd, LOOP_SET_DIRECT_IO, 1UL) < 0)
×
219
                        log_debug_errno(errno, "Failed to enable direct IO mode, ignoring: %m");
×
220

221
        return loop_configure_verify_direct_io(fd, c);
×
222
}
223

224
static int loop_configure(
2,545✔
225
                int nr,
226
                int open_flags,
227
                int lock_op,
228
                const struct loop_config *c,
229
                LoopDevice **ret) {
230

231
        static bool loop_configure_broken = false;
2,545✔
232

233
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
2,545✔
234
        _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -EBADF; /* This must be declared before lock_fd. */
×
235
        _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
5,090✔
236
        _cleanup_free_ char *node = NULL;
2,545✔
237
        uint64_t diskseq = 0;
2,545✔
238
        dev_t devno;
2,545✔
239
        int r;
2,545✔
240

241
        assert(nr >= 0);
2,545✔
242
        assert(c);
2,545✔
243
        assert(ret);
2,545✔
244

245
        if (asprintf(&node, "/dev/loop%i", nr) < 0)
2,545✔
246
                return log_oom_debug();
×
247

248
        r = sd_device_new_from_devname(&dev, node);
2,545✔
249
        if (r < 0)
2,545✔
250
                return log_debug_errno(r, "Failed to create sd_device object for \"%s\": %m", node);
×
251

252
        r = sd_device_get_devnum(dev, &devno);
2,545✔
253
        if (r < 0)
2,545✔
254
                return log_device_debug_errno(dev, r, "Failed to get devnum: %m");
×
255

256
        fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
2,545✔
257
        if (fd < 0)
2,545✔
258
                return log_device_debug_errno(dev, fd, "Failed to open device: %m");
×
259

260
        /* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened
261
         * fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on
262
         * block devices to reprobe them, hence by having a separate fd we will later close() we can ensure
263
         * we trigger udev after everything is done. If we'd lock our own fd instead and keep it open for a
264
         * long time udev would possibly never run on it again, even though the fd is unlocked, simply
265
         * because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to
266
         * automatically release the lock, after we are done. */
267
        lock_fd = open_lock_fd(fd, LOCK_EX);
2,545✔
268
        if (lock_fd < 0)
2,545✔
269
                return log_device_debug_errno(dev, lock_fd, "Failed to acquire lock: %m");
×
270

271
        log_device_debug(dev, "Acquired exclusive lock.");
2,577✔
272

273
        /* Let's see if backing file is really unattached. Someone may already attach a backing file without
274
         * taking BSD lock. */
275
        r = loop_is_bound(fd);
2,545✔
276
        if (r < 0)
2,545✔
277
                return log_device_debug_errno(dev, r, "Failed to check if the loopback block device is bound: %m");
×
278
        if (r > 0)
2,545✔
279
                return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EBUSY),
×
280
                                              "The loopback block device is already bound, ignoring.");
281

282
        /* Let's see if the device is really detached, i.e. currently has no associated partition block
283
         * devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
284
         * superficially is detached but still has partition block devices associated for it. Let's then
285
         * manually remove the partitions via BLKPG, and tell the caller we did that via EUCLEAN, so they try
286
         * again. */
287
        r = block_device_remove_all_partitions(dev, fd);
2,545✔
288
        if (r < 0)
2,545✔
289
                return log_device_debug_errno(dev, r, "Failed to remove partitions on the loopback block device: %m");
×
290
        if (r > 0)
2,545✔
291
                /* Removed all partitions. Let's report this to the caller, to try again, and count this as
292
                 * an attempt. */
293
                return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EUCLEAN),
×
294
                                              "Removed partitions on the loopback block device.");
295

296
        if (!loop_configure_broken) {
2,545✔
297
                if (ioctl(fd, LOOP_CONFIGURE, c) < 0) {
2,545✔
298
                        /* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other errors. */
299
                        if (!ERRNO_IS_IOCTL_NOT_SUPPORTED(errno))
×
300
                                return log_device_debug_errno(dev, errno, "ioctl(LOOP_CONFIGURE) failed: %m");
×
301

302
                        loop_configure_broken = true;
×
303
                } else {
304
                        loop_with_fd = TAKE_FD(fd);
2,545✔
305

306
                        r = loop_configure_verify(loop_with_fd, c);
2,545✔
307
                        if (r < 0)
2,545✔
308
                                return log_device_debug_errno(dev, r, "Failed to verify if loopback block device is correctly configured: %m");
×
309
                        if (r == 0) {
2,545✔
310
                                /* LOOP_CONFIGURE doesn't work. Remember that. */
311
                                loop_configure_broken = true;
×
312

313
                                /* We return EBUSY here instead of retrying immediately with LOOP_SET_FD,
314
                                 * because LOOP_CLR_FD is async: if the operation cannot be executed right
315
                                 * away it just sets the autoclear flag on the device. This means there's a
316
                                 * good chance we cannot actually reuse the loopback device right-away. Hence
317
                                 * let's assume it's busy, avoid the trouble and let the calling loop call us
318
                                 * again with a new, likely unused device. */
319
                                return -EBUSY;
×
320
                        }
321
                }
322
        }
323

324
        if (loop_configure_broken) {
2,545✔
325
                if (ioctl(fd, LOOP_SET_FD, c->fd) < 0)
×
326
                        return log_device_debug_errno(dev, errno, "ioctl(LOOP_SET_FD) failed: %m");
×
327

328
                loop_with_fd = TAKE_FD(fd);
×
329

330
                r = loop_configure_fallback(loop_with_fd, c);
×
331
                if (r < 0)
×
332
                        return r;
333
        }
334

335
        r = fd_get_diskseq(loop_with_fd, &diskseq);
2,545✔
336
        if (r < 0 && r != -EOPNOTSUPP)
2,545✔
337
                return log_device_debug_errno(dev, r, "Failed to get diskseq: %m");
×
338

339
        switch (lock_op & ~LOCK_NB) {
2,545✔
340
        case LOCK_EX: /* Already in effect */
341
                break;
342
        case LOCK_SH: /* Downgrade */
2,410✔
343
                if (flock(lock_fd, lock_op) < 0)
2,410✔
344
                        return log_device_debug_errno(dev, errno, "Failed to downgrade lock level: %m");
×
345
                break;
346
        case LOCK_UN: /* Release */
×
347
                lock_fd = safe_close(lock_fd);
×
348
                break;
349
        default:
×
350
                assert_not_reached();
×
351
        }
352

353
        uint64_t device_size;
2,545✔
354
        r = blockdev_get_device_size(loop_with_fd, &device_size);
2,545✔
355
        if (r < 0)
2,545✔
356
                return log_device_debug_errno(dev, r, "Failed to get loopback device size: %m");
×
357

358
        LoopDevice *d = new(LoopDevice, 1);
2,545✔
359
        if (!d)
2,545✔
360
                return log_oom_debug();
×
361

362
        *d = (LoopDevice) {
2,545✔
363
                .n_ref = 1,
364
                .fd = TAKE_FD(loop_with_fd),
2,545✔
365
                .lock_fd = TAKE_FD(lock_fd),
2,545✔
366
                .node = TAKE_PTR(node),
2,545✔
367
                .nr = nr,
368
                .devno = devno,
369
                .dev = TAKE_PTR(dev),
2,545✔
370
                .diskseq = diskseq,
371
                .sector_size = c->block_size,
2,545✔
372
                .device_size = device_size,
373
                .created = true,
374
        };
375

376
        *ret = TAKE_PTR(d);
2,545✔
377
        return 0;
2,545✔
378
}
379

380
static int fd_get_max_discard(int fd, uint64_t *ret) {
×
381
        char sysfs_path[STRLEN("/sys/dev/block/" ":" "/queue/discard_max_bytes") + DECIMAL_STR_MAX(dev_t) * 2 + 1];
×
382
        _cleanup_free_ char *buffer = NULL;
×
383
        struct stat st;
×
384
        int r;
×
385

386
        assert(ret);
×
387

388
        if (fstat(ASSERT_FD(fd), &st) < 0)
×
389
                return -errno;
×
390

391
        r = stat_verify_block(&st);
×
392
        if (r < 0)
×
393
                return r;
394

395
        xsprintf(sysfs_path, "/sys/dev/block/" DEVNUM_FORMAT_STR "/queue/discard_max_bytes", DEVNUM_FORMAT_VAL(st.st_rdev));
×
396

397
        r = read_one_line_file(sysfs_path, &buffer);
×
398
        if (r < 0)
×
399
                return r;
400

401
        return safe_atou64(buffer, ret);
×
402
}
403

404
static int fd_set_max_discard(int fd, uint64_t max_discard) {
×
405
        char sysfs_path[STRLEN("/sys/dev/block/" ":" "/queue/discard_max_bytes") + DECIMAL_STR_MAX(dev_t) * 2 + 1];
×
406
        struct stat st;
×
407
        int r;
×
408

409
        if (fstat(ASSERT_FD(fd), &st) < 0)
×
410
                return -errno;
×
411

412
        r = stat_verify_block(&st);
×
413
        if (r < 0)
×
414
                return r;
415

416
        xsprintf(sysfs_path, "/sys/dev/block/" DEVNUM_FORMAT_STR "/queue/discard_max_bytes", DEVNUM_FORMAT_VAL(st.st_rdev));
×
417

418
        return write_string_filef(sysfs_path, WRITE_STRING_FILE_DISABLE_BUFFER, "%" PRIu64, max_discard);
×
419
}
420

421
static int probe_sector_size_harder(int fd, uint32_t *ret) {
2,431✔
422
        _cleanup_close_ int non_direct_io_fd = -EBADF;
2,431✔
423
        int probe_fd, f_flags;
2,431✔
424

425
        assert(fd >= 0);
2,431✔
426
        assert(ret);
2,431✔
427

428
        /* Wraps probe_sector_size() but handles O_DIRECT: if the fd is opened with O_DIRECT there are
429
         * strict alignment requirements for reads, so we temporarily reopen it without O_DIRECT for the
430
         * probing logic. */
431

432
        f_flags = fcntl(fd, F_GETFL);
2,431✔
433
        if (f_flags < 0)
2,431✔
434
                return -errno;
×
435

436
        if (FLAGS_SET(f_flags, O_DIRECT)) {
2,431✔
437
                non_direct_io_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
2,424✔
438
                if (non_direct_io_fd < 0)
2,424✔
439
                        return non_direct_io_fd;
440

441
                probe_fd = non_direct_io_fd;
442
        } else
443
                probe_fd = fd;
444

445
        return probe_sector_size(probe_fd, ret);
2,431✔
446
}
447

448
static int loop_device_can_shortcut(
1✔
449
                int fd,
450
                uint64_t offset,
451
                uint64_t size,
452
                uint32_t sector_size,
453
                uint32_t device_ssz,
454
                uint32_t loop_flags) {
455

456
        int r;
1✔
457

458
        /* Returns whether we can hand back the original block device fd instead of allocating a real
459
         * loopback device for it: it must cover the whole device, the requested sector size must match the
460
         * device's sector size, and if partscan was requested it must already be enabled on the device
461
         * (otherwise e.g. partition block devices or loop devices created without LO_FLAGS_PARTSCAN would
462
         * be reused even though they cannot expose nested partitions). */
463

464
        assert(fd >= 0);
1✔
465

466
        if (offset != 0)
1✔
467
                return false;
468
        if (!IN_SET(size, 0, UINT64_MAX))
1✔
469
                return false;
470
        if (sector_size != device_ssz)
1✔
471
                return false;
472

473
        if (FLAGS_SET(loop_flags, LO_FLAGS_PARTSCAN)) {
1✔
474
                r = blockdev_partscan_enabled_fd(fd);
1✔
475
                if (r < 0)
1✔
476
                        return r;
477
                if (r == 0)
1✔
478
                        return false;
×
479
        }
480

481
        return true;
482
}
483

484
static int loop_device_make_internal(
2,566✔
485
                const char *path,
486
                int fd,
487
                int open_flags,
488
                uint64_t offset,
489
                uint64_t size,
490
                uint32_t sector_size,
491
                uint32_t loop_flags,
492
                int lock_op,
493
                LoopDevice **ret) {
494

495
        _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
×
496
        _cleanup_close_ int reopened_fd = -EBADF, control = -EBADF;
5,132✔
497
        _cleanup_free_ char *backing_file = NULL;
2,566✔
498
        struct loop_config config;
2,566✔
499
        int r, f_flags;
2,566✔
500
        struct stat st;
2,566✔
501

502
        assert(fd >= 0);
2,566✔
503
        assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
2,566✔
504
        assert(ret);
2,566✔
505

506
        /* sector_size interpretation:
507
         *   0          → use device sector size for block devices, 512 for regular files
508
         *   UINT32_MAX → probe GPT header to find the right sector size, fall back to 0 behavior
509
         *   other      → use the specified sector size explicitly */
510

511
        f_flags = fcntl(fd, F_GETFL);
2,566✔
512
        if (f_flags < 0)
2,566✔
513
                return -errno;
×
514

515
        if (open_flags < 0) {
2,566✔
516
                /* If open_flags is unset, initialize it from the open fd */
517
                if (FLAGS_SET(f_flags, O_PATH))
6✔
518
                        return log_debug_errno(SYNTHETIC_ERRNO(EBADFD), "Access mode of image file indicates O_PATH, cannot determine read/write flags.");
×
519

520
                open_flags = f_flags & O_ACCMODE_STRICT;
6✔
521
                if (!IN_SET(open_flags, O_RDWR, O_RDONLY))
6✔
522
                        return log_debug_errno(SYNTHETIC_ERRNO(EBADFD), "Access mode of image file is write only (?)");
×
523
        }
524

525
        if (sector_size == UINT32_MAX) {
2,566✔
526
                /* If sector size is specified as UINT32_MAX, we'll try to probe the right sector size
527
                 * by looking for the GPT partition header at various offsets. This of course only works
528
                 * if the image already has a disk label. */
529

530
                r = probe_sector_size_harder(fd, &sector_size);
2,431✔
531
                if (r < 0)
2,431✔
532
                        return r;
533
                if (r == 0)
2,431✔
534
                        sector_size = 0; /* If we can't probe anything, use default sector size. */
2,311✔
535
        }
536

537
        if (fstat(fd, &st) < 0)
2,566✔
538
                return -errno;
×
539

540
        if (S_ISBLK(st.st_mode)) {
2,566✔
541
                uint32_t device_ssz;
1✔
542
                r = blockdev_get_sector_size(fd, &device_ssz);
1✔
543
                if (r < 0)
1✔
544
                        return r;
1✔
545

546
                if (sector_size == 0)
1✔
547
                        sector_size = device_ssz;
×
548

549
                r = loop_device_can_shortcut(fd, offset, size, sector_size, device_ssz, loop_flags);
1✔
550
                if (r < 0)
1✔
551
                        return r;
552
                if (r > 0)
1✔
553
                        return loop_device_open_from_fd(fd, open_flags, lock_op, ret);
1✔
554
        } else {
555
                r = stat_verify_regular(&st);
2,565✔
556
                if (r < 0)
2,565✔
557
                        return r;
558

559
                if (sector_size == 0)
2,565✔
560
                        sector_size = 512;
2,311✔
561
        }
562

563
        if (path) {
2,565✔
564
                r = path_make_absolute_cwd(path, &backing_file);
344✔
565
                if (r < 0)
344✔
566
                        return r;
567

568
                path_simplify(backing_file);
344✔
569
        } else {
570
                r = fd_get_path(fd, &backing_file);
2,221✔
571
                if (r < 0)
2,221✔
572
                        return r;
573
        }
574

575
        if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) != FLAGS_SET(f_flags, O_DIRECT)) {
2,565✔
576
                /* If LO_FLAGS_DIRECT_IO is requested, then make sure we have the fd open with O_DIRECT, as
577
                 * that's required. Conversely, if it's off require that O_DIRECT is off too (that's because
578
                 * new kernels will implicitly enable LO_FLAGS_DIRECT_IO if O_DIRECT is set).
579
                 *
580
                 * Our intention here is that LO_FLAGS_DIRECT_IO is the primary knob, and O_DIRECT derived
581
                 * from that automatically. */
582

583
                reopened_fd = fd_reopen(fd, (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0)|O_CLOEXEC|O_NONBLOCK|open_flags);
141✔
584
                if (reopened_fd < 0) {
141✔
585
                        if (!FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO))
×
586
                                return log_debug_errno(reopened_fd, "Failed to reopen file descriptor without O_DIRECT: %m");
×
587

588
                        /* Some file systems might not support O_DIRECT, let's gracefully continue without it then. */
589
                        log_debug_errno(reopened_fd, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without.");
×
590
                        loop_flags &= ~LO_FLAGS_DIRECT_IO;
×
591
                } else
592
                        fd = reopened_fd; /* From now on, operate on our new O_DIRECT fd */
593
        }
594

595
        control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
2,565✔
596
        if (control < 0)
2,565✔
597
                return -errno;
20✔
598

599
        /* Strip LO_FLAGS_PARTSCAN from LOOP_CONFIGURE and enable it afterwards via
600
         * LOOP_SET_STATUS64 to work around a kernel race: LOOP_CONFIGURE sends a uevent with
601
         * GD_NEED_PART_SCAN set before calling loop_reread_partitions(). If udev opens the device in
602
         * response, blkdev_get_whole() triggers a first scan, then loop_reread_partitions() does a
603
         * second scan that briefly drops all partitions. By configuring without partscan,
604
         * GD_SUPPRESS_PART_SCAN stays set, making any concurrent open harmless. LOOP_SET_STATUS64
605
         * doesn't call disk_force_media_change() so it doesn't set GD_NEED_PART_SCAN.
606
         *
607
         * See: https://lore.kernel.org/linux-block/20260330081819.652890-1-daan@amutable.com/T/#u
608
         * Drop this workaround once the kernel fix is widely available. */
609
        bool deferred_partscan = FLAGS_SET(loop_flags, LO_FLAGS_PARTSCAN);
2,545✔
610

611
        config = (struct loop_config) {
5,090✔
612
                .fd = fd,
613
                .block_size = sector_size,
614
                .info = {
615
                        /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
616
                        .lo_flags = ((loop_flags & ~(LO_FLAGS_READ_ONLY|LO_FLAGS_PARTSCAN)) |
5,090✔
617
                                     ((open_flags & O_ACCMODE_STRICT) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) |
2,545✔
618
                                     LO_FLAGS_AUTOCLEAR),
619
                        .lo_offset = offset,
620
                        .lo_sizelimit = size == UINT64_MAX ? 0 : size,
2,545✔
621
                },
622
        };
623

624
        /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
625
         * be gone already, taken by somebody else racing against us. */
626
        for (unsigned n_attempts = 0;;) {
2,545✔
627
                usec_t usec;
2,545✔
628
                int nr;
2,545✔
629

630
                /* Let's take a lock on the control device first. On a busy system, where many programs
631
                 * attempt to allocate a loopback device at the same time, we might otherwise keep looping
632
                 * around relatively heavy operations: asking for a free loopback device, then opening it,
633
                 * validating it, attaching something to it. Let's serialize this whole operation, to make
634
                 * unnecessary busywork less likely. Note that this is just something we do to optimize our
635
                 * own code (and whoever else decides to use LOCK_EX locks for this), taking this lock is not
636
                 * necessary, it just means it's less likely we have to iterate through this loop again and
637
                 * again if our own code races against our own code.
638
                 *
639
                 * Note: our lock protocol is to take the /dev/loop-control lock first, and the block device
640
                 * lock second, if both are taken, and always in this order, to avoid ABBA locking issues. */
641
                if (flock(control, LOCK_EX) < 0)
2,545✔
642
                        return -errno;
×
643

644
                nr = ioctl(control, LOOP_CTL_GET_FREE);
2,545✔
645
                if (nr < 0)
2,545✔
646
                        return -errno;
×
647

648
                r = loop_configure(nr, open_flags, lock_op, &config, &d);
2,545✔
649
                if (r >= 0)
2,545✔
650
                        break;
651

652
                /* -ENODEV or friends: Somebody might've gotten the same number from the kernel, used the
653
                 * device, and called LOOP_CTL_REMOVE on it. Let's retry with a new number.
654
                 * -EBUSY: a file descriptor is already bound to the loopback block device.
655
                 * -EUCLEAN: some left-over partition devices that were cleaned up.
656
                 * -ENOANO: we tried to use LO_FLAGS_DIRECT_IO but the kernel rejected it. */
657
                if (!ERRNO_IS_DEVICE_ABSENT(r) && !IN_SET(r, -EBUSY, -EUCLEAN, -ENOANO))
×
658
                        return r;
659

660
                /* OK, this didn't work, let's try again a bit later, but first release the lock on the
661
                 * control device */
662
                if (flock(control, LOCK_UN) < 0)
×
663
                        return -errno;
×
664

665
                if (++n_attempts >= 64) /* Give up eventually */
×
666
                        return -EBUSY;
667

668
                /* If we failed to enable direct IO mode, let's retry without it. We restart the process as
669
                 * on some combination of kernel version and storage filesystem, the kernel is very unhappy
670
                 * about a failed DIRECT_IO enablement and throws I/O errors. */
671
                if (r == -ENOANO && FLAGS_SET(config.info.lo_flags, LO_FLAGS_DIRECT_IO)) {
×
672
                        config.info.lo_flags &= ~LO_FLAGS_DIRECT_IO;
×
673
                        open_flags &= ~O_DIRECT;
×
674

675
                        int non_direct_io_fd = fd_reopen(config.fd, O_CLOEXEC|O_NONBLOCK|open_flags);
×
676
                        if (non_direct_io_fd < 0)
×
677
                                return log_debug_errno(
×
678
                                                non_direct_io_fd,
679
                                                "Failed to reopen file descriptor without O_DIRECT: %m");
680

681
                        safe_close(reopened_fd);
×
682
                        fd = config.fd = /* For cleanups */ reopened_fd = non_direct_io_fd;
×
683
                }
684

685
                /* Wait some random time, to make collision less likely. Let's pick a random time in the
686
                 * range 0ms…250ms, linearly scaled by the number of failed attempts. */
687
                usec = random_u64_range(UINT64_C(10) * USEC_PER_MSEC +
×
688
                                        UINT64_C(240) * USEC_PER_MSEC * n_attempts/64);
×
689
                log_debug("Trying again after %s.", FORMAT_TIMESPAN(usec, USEC_PER_MSEC));
×
690
                (void) usleep_safe(usec);
×
691
        }
692

693
        if (S_ISBLK(st.st_mode)) {
2,545✔
694
                /* Propagate backing device's discard byte limit to our loopback block device. We do this in
695
                 * order to avoid that (supposedly quick) discard requests on the loopback device get turned
696
                 * into (likely slow) zero-out requests on backing devices that do not support discarding
697
                 * natively, but do support zero-out. */
698
                uint64_t discard_max_bytes;
×
699

700
                r = fd_get_max_discard(fd, &discard_max_bytes);
×
701
                if (r < 0)
×
702
                        log_debug_errno(r, "Failed to read 'discard_max_bytes' of backing device, ignoring: %m");
×
703
                else {
704
                        r = fd_set_max_discard(d->fd, discard_max_bytes);
×
705
                        if (r < 0)
×
706
                                log_debug_errno(r, "Failed to write 'discard_max_bytes' of loop device, ignoring: %m");
×
707
                }
708
        }
709

710
        if (deferred_partscan) {
2,545✔
711
                /* Open+close to drain GD_NEED_PART_SCAN harmlessly (GD_SUPPRESS_PART_SCAN is still
712
                 * set so no partitions appear). Then enable partscan via LOOP_SET_STATUS64. */
713
                int tmp_fd = fd_reopen(d->fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
2,366✔
714
                if (tmp_fd < 0)
2,366✔
715
                        return log_debug_errno(tmp_fd, "Failed to reopen loop device to drain partscan flag: %m");
×
716
                safe_close(tmp_fd);
2,366✔
717

718
                struct loop_info64 info;
2,366✔
719
                if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
2,366✔
720
                        return log_debug_errno(errno, "Failed to get loop device status: %m");
×
721

722
                info.lo_flags |= LO_FLAGS_PARTSCAN;
2,366✔
723

724
                if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
2,366✔
725
                        return log_debug_errno(errno, "Failed to enable partscan on loop device: %m");
×
726
        }
727

728
        d->backing_file = TAKE_PTR(backing_file);
2,545✔
729
        d->backing_inode = st.st_ino;
2,545✔
730
        d->backing_devno = st.st_dev;
2,545✔
731

732
        log_debug("Successfully acquired %s, devno=%u:%u, nr=%i, diskseq=%" PRIu64,
2,545✔
733
                  d->node,
734
                  major(d->devno), minor(d->devno),
735
                  d->nr,
736
                  d->diskseq);
737

738
        *ret = TAKE_PTR(d);
2,545✔
739
        return 0;
2,545✔
740
}
741

742
static uint32_t loop_flags_mangle(uint32_t loop_flags) {
2,566✔
743
        int r;
2,566✔
744

745
        r = getenv_bool("SYSTEMD_LOOP_DIRECT_IO");
2,566✔
746
        if (r < 0 && r != -ENXIO)
2,566✔
747
                log_debug_errno(r, "Failed to parse $SYSTEMD_LOOP_DIRECT_IO, ignoring: %m");
×
748

749
        return UPDATE_FLAG(loop_flags, LO_FLAGS_DIRECT_IO, r != 0); /* Turn on LO_FLAGS_DIRECT_IO by default, unless explicitly configured to off. */
2,566✔
750
}
751

752
int loop_device_make(
141✔
753
                int fd,
754
                int open_flags,
755
                uint64_t offset,
756
                uint64_t size,
757
                uint32_t sector_size,
758
                uint32_t loop_flags,
759
                int lock_op,
760
                LoopDevice **ret) {
761

762
        assert(fd >= 0);
141✔
763
        assert(ret);
141✔
764

765
        return loop_device_make_internal(
141✔
766
                        NULL,
767
                        fd,
768
                        open_flags,
769
                        offset,
770
                        size,
771
                        sector_size,
772
                        loop_flags_mangle(loop_flags),
773
                        lock_op,
774
                        ret);
775
}
776

777
int loop_device_make_by_path_at(
2,425✔
778
                int dir_fd,
779
                const char *path,
780
                int open_flags,
781
                uint32_t sector_size,
782
                uint32_t loop_flags,
783
                int lock_op,
784
                LoopDevice **ret) {
785

786
        int r, basic_flags, direct_flags, rdwr_flags;
2,425✔
787
        _cleanup_close_ int fd = -EBADF;
2,425✔
788
        bool direct = false;
2,425✔
789

790
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
2,425✔
791
        assert(ret);
2,425✔
792
        assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
2,425✔
793

794
        /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying
795
         * read-only if we cannot. */
796

797
        loop_flags = loop_flags_mangle(loop_flags);
2,425✔
798

799
        /* Let's open with O_DIRECT if we can. But not all file systems support that, hence fall back to
800
         * non-O_DIRECT mode automatically, if it fails. */
801

802
        basic_flags = O_CLOEXEC|O_NONBLOCK|O_NOCTTY;
2,425✔
803
        direct_flags = FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0;
2,425✔
804
        rdwr_flags = open_flags >= 0 ? open_flags : O_RDWR;
2,425✔
805

806
        fd = xopenat(dir_fd, path, basic_flags|direct_flags|rdwr_flags);
2,425✔
807
        if (fd < 0 && direct_flags != 0) /* If we had O_DIRECT on, and things failed with that, let's immediately try again without */
2,425✔
808
                fd = xopenat(dir_fd, path, basic_flags|rdwr_flags);
1✔
809
        else
810
                direct = direct_flags != 0;
×
811
        if (fd < 0) {
2,425✔
812
                r = fd;
1✔
813

814
                /* Retry read-only? */
815
                if (open_flags >= 0 || !ERRNO_IS_NEG_FS_WRITE_REFUSED(r))
2,426✔
816
                        return r;
817

818
                fd = xopenat(dir_fd, path, basic_flags|direct_flags|O_RDONLY);
×
819
                if (fd < 0 && direct_flags != 0) /* as above */
×
820
                        fd = xopenat(dir_fd, path, basic_flags|O_RDONLY);
×
821
                else
822
                        direct = direct_flags != 0;
×
823
                if (fd < 0)
×
824
                        return r; /* Propagate original error */
825

826
                open_flags = O_RDONLY;
827
        } else if (open_flags < 0)
2,424✔
828
                open_flags = O_RDWR;
149✔
829

830
        log_debug("Opened %s in %s access mode%s, with O_DIRECT %s%s.",
11,563✔
831
                  path ?: "loop device",
832
                  open_flags == O_RDWR ? "O_RDWR" : "O_RDONLY",
833
                  open_flags != rdwr_flags ? " (O_RDWR was requested but not allowed)" : "",
834
                  direct ? "enabled" : "disabled",
835
                  direct != (direct_flags != 0) ? " (O_DIRECT was requested but not supported)" : "");
836

837
        return loop_device_make_internal(
4,503✔
838
                        dir_fd == AT_FDCWD ? path : NULL,
839
                        fd,
840
                        open_flags,
841
                        /* offset= */ 0,
842
                        /* size= */ 0,
843
                        sector_size,
844
                        loop_flags,
845
                        lock_op,
846
                        ret);
847
}
848

849
int loop_device_make_by_path_memory(
1✔
850
                const char *path,
851
                int open_flags,
852
                uint32_t sector_size,
853
                uint32_t loop_flags,
854
                int lock_op,
855
                LoopDevice **ret) {
856

857
        _cleanup_close_ int fd = -EBADF, mfd = -EBADF;
1✔
858
        _cleanup_free_ char *fn = NULL;
1✔
859
        int r;
1✔
860

861
        assert(path);
1✔
862
        assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
1✔
863
        assert(ret);
1✔
864

865
        /* memfds are always writable, so default to O_RDWR when auto-detecting. */
866
        if (open_flags < 0)
1✔
867
                open_flags = O_RDWR;
1✔
868

869
        loop_flags &= ~LO_FLAGS_DIRECT_IO; /* memfds don't support O_DIRECT, hence LO_FLAGS_DIRECT_IO can't be used either */
1✔
870

871
        fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
1✔
872
        if (fd < 0)
1✔
873
                return -errno;
×
874

875
        r = fd_verify_regular_or_block(fd);
1✔
876
        if (r < 0)
1✔
877
                return r;
878

879
        r = path_extract_filename(path, &fn);
1✔
880
        if (r < 0)
1✔
881
                return r;
882

883
        mfd = memfd_clone_fd(fd, fn, open_flags|O_CLOEXEC);
1✔
884
        if (mfd < 0)
1✔
885
                return mfd;
886

887
        fd = safe_close(fd); /* Let's close the original early */
1✔
888

889
        return loop_device_make_internal(NULL, mfd, open_flags, 0, 0, sector_size, loop_flags, lock_op, ret);
1✔
890
}
891

892
static LoopDevice* loop_device_free(LoopDevice *d) {
2,583✔
893
        _cleanup_close_ int control = -EBADF;
2,583✔
894
        int r;
2,583✔
895

896
        if (!d)
2,583✔
897
                return NULL;
898

899
        /* Release any lock we might have on the device first. We want to open+lock the /dev/loop-control
900
         * device below, but our lock protocol says that if both control and block device locks are taken,
901
         * the control lock needs to be taken first, the block device lock second — in order to avoid ABBA
902
         * locking issues. Moreover, we want to issue LOOP_CLR_FD on the block device further down, and that
903
         * would fail if we had another fd open to the device. */
904
        d->lock_fd = safe_close(d->lock_fd);
2,583✔
905

906
        /* Let's open the control device early, and lock it, so that we can release our block device and
907
         * delete it in a synchronized fashion, and allocators won't needlessly see the block device as free
908
         * while we are about to delete it. */
909
        if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
2,583✔
910
                control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
2,268✔
911
                if (control < 0)
2,268✔
912
                        log_debug_errno(errno, "Failed to open loop control device, cannot remove loop device '%s', ignoring: %m", strna(d->node));
×
913
                else if (flock(control, LOCK_EX) < 0)
2,268✔
914
                        log_debug_errno(errno, "Failed to lock loop control device, ignoring: %m");
×
915
        }
916

917
        /* Then let's release the loopback block device */
918
        if (d->fd >= 0) {
2,583✔
919
                /* Implicitly sync the device, since otherwise in-flight blocks might not get written */
920
                if (fsync(d->fd) < 0)
2,583✔
921
                        log_debug_errno(errno, "Failed to sync loop block device, ignoring: %m");
×
922

923
                if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
2,583✔
924
                        /* We are supposed to clear the loopback device. Let's do this synchronously: lock
925
                         * the device, manually remove all partitions and then clear it. This should ensure
926
                         * udev doesn't concurrently access the devices, and we can be reasonably sure that
927
                         * once we are done here the device is cleared and all its partition children
928
                         * removed. Note that we lock our primary device fd here (and not a separate locking
929
                         * fd, as we do during allocation, since we want to keep the lock all the way through
930
                         * the LOOP_CLR_FD, but that call would fail if we had more than one fd open.) */
931

932
                        if (flock(d->fd, LOCK_EX) < 0)
2,268✔
933
                                log_debug_errno(errno, "Failed to lock loop block device, ignoring: %m");
×
934

935
                        r = block_device_remove_all_partitions(d->dev, d->fd);
2,268✔
936
                        if (r < 0)
2,268✔
937
                                log_debug_errno(r, "Failed to remove partitions of loopback block device, ignoring: %m");
×
938

939
                        if (ioctl(d->fd, LOOP_CLR_FD) < 0)
2,268✔
940
                                log_debug_errno(errno, "Failed to clear loop device, ignoring: %m");
×
941
                }
942

943
                safe_close(d->fd);
2,583✔
944
        }
945

946
        /* Now that the block device is released, let's also try to remove it */
947
        if (control >= 0) {
2,583✔
948
                useconds_t delay = 5 * USEC_PER_MSEC;  /* A total delay of 5090 ms between 39 attempts,
949
                                                        * (4*5 + 5*10 + 5*20 + … + 3*640) = 5090. */
950

951
                for (unsigned attempt = 1;; attempt++) {
269✔
952
                        if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
2,537✔
953
                                break;
954
                        if (errno != EBUSY || attempt > 38) {
270✔
955
                                log_debug_errno(errno, "Failed to remove device %s: %m", strna(d->node));
1✔
956
                                break;
957
                        }
958
                        if (attempt % 5 == 0) {
269✔
959
                                log_debug("Device is still busy after %u attempts…", attempt);
19✔
960
                                delay *= 2;
19✔
961
                        }
962

963
                        (void) usleep_safe(delay);
269✔
964
                }
965
        }
966

967
        free(d->node);
2,583✔
968
        sd_device_unref(d->dev);
2,583✔
969
        free(d->backing_file);
2,583✔
970
        return mfree(d);
2,583✔
971
}
972

973
DEFINE_TRIVIAL_REF_UNREF_FUNC(LoopDevice, loop_device, loop_device_free);
8,068✔
974

975
void loop_device_relinquish(LoopDevice *d) {
203✔
976
        assert(d);
203✔
977

978
        /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
979
         * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
980

981
        d->relinquished = true;
203✔
982
}
203✔
983

984
void loop_device_unrelinquish(LoopDevice *d) {
25✔
985
        assert(d);
25✔
986
        d->relinquished = false;
25✔
987
}
25✔
988

989
int loop_device_open(
137✔
990
                sd_device *dev,
991
                int open_flags,
992
                int lock_op,
993
                LoopDevice **ret) {
994

995
        _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
137✔
996
        _cleanup_free_ char *node = NULL, *backing_file = NULL;
137✔
997
        dev_t devnum, backing_devno = 0;
137✔
998
        struct loop_info64 info;
137✔
999
        ino_t backing_inode = 0;
137✔
1000
        uint64_t diskseq = 0;
137✔
1001
        LoopDevice *d;
137✔
1002
        const char *s;
137✔
1003
        int r, nr = -1;
137✔
1004

1005
        assert(dev);
137✔
1006
        assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
137✔
1007
        assert(ret);
137✔
1008

1009
        /* Even if fd is provided through the argument in loop_device_open_from_fd(), we reopen the inode
1010
         * here, instead of keeping just a dup() clone of it around, since we want to ensure that the
1011
         * O_DIRECT flag of the handle we keep is off, we have our own file index, and have the right
1012
         * read/write mode in effect. */
1013
        fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
137✔
1014
        if (fd < 0)
137✔
1015
                return fd;
1016

1017
        if ((lock_op & ~LOCK_NB) != LOCK_UN) {
137✔
1018
                lock_fd = open_lock_fd(fd, lock_op);
137✔
1019
                if (lock_fd < 0)
137✔
1020
                        return lock_fd;
1021
        }
1022

1023
        if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0) {
137✔
1024
#if HAVE_VALGRIND_MEMCHECK_H
1025
                /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
1026
                VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
1027
#endif
1028
                nr = info.lo_number;
32✔
1029

1030
                if (sd_device_get_sysattr_value(dev, "loop/backing_file", &s) >= 0) {
32✔
1031
                        backing_file = strdup(s);
23✔
1032
                        if (!backing_file)
23✔
1033
                                return -ENOMEM;
1034
                }
1035

1036
                backing_devno = info.lo_device;
32✔
1037
                backing_inode = info.lo_inode;
32✔
1038
        }
1039

1040
        r = fd_get_diskseq(fd, &diskseq);
137✔
1041
        if (r < 0 && r != -EOPNOTSUPP)
137✔
1042
                return r;
1043

1044
        uint32_t sector_size;
137✔
1045
        r = blockdev_get_sector_size(fd, &sector_size);
137✔
1046
        if (r < 0)
137✔
1047
                return r;
1048

1049
        uint64_t device_size;
137✔
1050
        r = blockdev_get_device_size(fd, &device_size);
137✔
1051
        if (r < 0)
137✔
1052
                return r;
1053

1054
        r = sd_device_get_devnum(dev, &devnum);
137✔
1055
        if (r < 0)
137✔
1056
                return r;
1057

1058
        r = sd_device_get_devname(dev, &s);
137✔
1059
        if (r < 0)
137✔
1060
                return r;
1061

1062
        node = strdup(s);
137✔
1063
        if (!node)
137✔
1064
                return -ENOMEM;
1065

1066
        d = new(LoopDevice, 1);
137✔
1067
        if (!d)
137✔
1068
                return -ENOMEM;
1069

1070
        *d = (LoopDevice) {
274✔
1071
                .n_ref = 1,
1072
                .fd = TAKE_FD(fd),
137✔
1073
                .lock_fd = TAKE_FD(lock_fd),
137✔
1074
                .nr = nr,
1075
                .node = TAKE_PTR(node),
137✔
1076
                .dev = sd_device_ref(dev),
137✔
1077
                .backing_file = TAKE_PTR(backing_file),
137✔
1078
                .backing_inode = backing_inode,
1079
                .backing_devno = backing_devno,
1080
                .relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
1081
                .devno = devnum,
1082
                .diskseq = diskseq,
1083
                .sector_size = sector_size,
1084
                .device_size = device_size,
1085
                .created = false,
1086
        };
1087

1088
        *ret = d;
137✔
1089
        return 0;
137✔
1090
}
1091

1092
int loop_device_open_from_fd(
6✔
1093
                int fd,
1094
                int open_flags,
1095
                int lock_op,
1096
                LoopDevice **ret) {
1097

1098
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
6✔
1099
        int r;
6✔
1100

1101
        r = block_device_new_from_fd(ASSERT_FD(fd), 0, &dev);
6✔
1102
        if (r < 0)
6✔
1103
                return r;
1104

1105
        return loop_device_open(dev, open_flags, lock_op, ret);
6✔
1106
}
1107

1108
int loop_device_open_from_path(
×
1109
                const char *path,
1110
                int open_flags,
1111
                int lock_op,
1112
                LoopDevice **ret) {
1113

1114
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
×
1115
        int r;
×
1116

1117
        assert(path);
×
1118

1119
        r = block_device_new_from_path(path, 0, &dev);
×
1120
        if (r < 0)
×
1121
                return r;
1122

1123
        return loop_device_open(dev, open_flags, lock_op, ret);
×
1124
}
1125

1126
static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
×
1127
        char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1];
×
1128
        _cleanup_free_ char *buffer = NULL;
×
1129
        uint64_t current_offset, current_size, partno;
×
1130
        _cleanup_close_ int whole_fd = -EBADF;
×
1131
        struct stat st;
×
1132
        dev_t devno;
×
1133
        int r;
×
1134

1135
        /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
1136
         * loopback device), and changes the offset, if needed. This is a fancy wrapper around
1137
         * BLKPG_RESIZE_PARTITION. */
1138

1139
        if (fstat(ASSERT_FD(partition_fd), &st) < 0)
×
1140
                return -errno;
×
1141

1142
        assert(S_ISBLK(st.st_mode));
×
1143

1144
        xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev));
×
1145
        r = read_one_line_file(sysfs, &buffer);
×
1146
        if (r == -ENOENT) /* not a partition, cannot resize */
×
1147
                return -ENOTTY;
1148
        if (r < 0)
×
1149
                return r;
1150
        r = safe_atou64(buffer, &partno);
×
1151
        if (r < 0)
×
1152
                return r;
1153

1154
        xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/start", DEVNUM_FORMAT_VAL(st.st_rdev));
×
1155

1156
        buffer = mfree(buffer);
×
1157
        r = read_one_line_file(sysfs, &buffer);
×
1158
        if (r < 0)
×
1159
                return r;
1160
        r = safe_atou64(buffer, &current_offset);
×
1161
        if (r < 0)
×
1162
                return r;
1163
        if (current_offset > UINT64_MAX/512U)
×
1164
                return -EINVAL;
1165
        current_offset *= 512U;
×
1166

1167
        r = blockdev_get_device_size(partition_fd, &current_size);
×
1168
        if (r < 0)
×
1169
                return r;
1170

1171
        if (size == UINT64_MAX && offset == UINT64_MAX)
×
1172
                return 0;
1173
        if (current_size == size && current_offset == offset)
×
1174
                return 0;
1175

1176
        xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/../dev", DEVNUM_FORMAT_VAL(st.st_rdev));
×
1177

1178
        buffer = mfree(buffer);
×
1179
        r = read_one_line_file(sysfs, &buffer);
×
1180
        if (r < 0)
×
1181
                return r;
1182
        r = parse_devnum(buffer, &devno);
×
1183
        if (r < 0)
×
1184
                return r;
1185

1186
        whole_fd = r = device_open_from_devnum(S_IFBLK, devno, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY, NULL);
×
1187
        if (r < 0)
×
1188
                return r;
1189

1190
        return block_device_resize_partition(
×
1191
                        whole_fd,
1192
                        partno,
1193
                        offset == UINT64_MAX ? current_offset : offset,
1194
                        size == UINT64_MAX ? current_size : size);
1195
}
1196

1197
int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
22✔
1198
        struct loop_info64 info;
22✔
1199

1200
        assert(d);
22✔
1201
        assert(d->fd >= 0);
22✔
1202

1203
        /* Changes the offset/start of the loop device relative to the beginning of the underlying file or
1204
         * block device. If this loop device actually refers to a partition and not a loopback device, we'll
1205
         * try to adjust the partition offsets instead.
1206
         *
1207
         * If either offset or size is UINT64_MAX we won't change that parameter. */
1208

1209
        if (d->nr < 0) /* not a loopback device */
22✔
1210
                return resize_partition(d->fd, offset, size);
×
1211

1212
        if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
22✔
1213
                return -errno;
×
1214

1215
#if HAVE_VALGRIND_MEMCHECK_H
1216
        /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
1217
        VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
1218
#endif
1219

1220
        if ((size == UINT64_MAX || info.lo_sizelimit == size) &&
22✔
1221
            (offset == UINT64_MAX || info.lo_offset == offset))
×
1222
                return 0;
1223

1224
        if (size != UINT64_MAX)
22✔
1225
                info.lo_sizelimit = size;
22✔
1226
        if (offset != UINT64_MAX)
22✔
1227
                info.lo_offset = offset;
×
1228

1229
        return RET_NERRNO(ioctl(d->fd, LOOP_SET_STATUS64, &info));
22✔
1230
}
1231

1232
int loop_device_flock(LoopDevice *d, int operation) {
163✔
1233
        assert(IN_SET(operation & ~LOCK_NB, LOCK_UN, LOCK_SH, LOCK_EX));
163✔
1234
        assert(d);
163✔
1235

1236
        /* When unlocking just close the lock fd */
1237
        if ((operation & ~LOCK_NB) == LOCK_UN) {
163✔
1238
                d->lock_fd = safe_close(d->lock_fd);
161✔
1239
                return 0;
161✔
1240
        }
1241

1242
        /* If we had no lock fd so far, create one and lock it right-away */
1243
        if (d->lock_fd < 0) {
2✔
1244
                d->lock_fd = open_lock_fd(ASSERT_FD(d->fd), operation);
1✔
1245
                if (d->lock_fd < 0)
1✔
1246
                        return d->lock_fd;
1247

1248
                return 0;
1✔
1249
        }
1250

1251
        /* Otherwise change the current lock mode on the existing fd */
1252
        return RET_NERRNO(flock(d->lock_fd, operation));
1✔
1253
}
1254

1255
int loop_device_sync(LoopDevice *d) {
79✔
1256
        assert(d);
79✔
1257

1258
        /* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that
1259
         * we can check the return value though. */
1260

1261
        return RET_NERRNO(fsync(ASSERT_FD(d->fd)));
79✔
1262
}
1263

1264
int loop_device_set_autoclear(LoopDevice *d, bool autoclear) {
13✔
1265
        struct loop_info64 info;
13✔
1266

1267
        assert(d);
13✔
1268

1269
        if (LOOP_DEVICE_IS_FOREIGN(d))
13✔
1270
                return 0;
13✔
1271

1272
        if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
13✔
1273
                return -errno;
×
1274

1275
        if (autoclear == FLAGS_SET(info.lo_flags, LO_FLAGS_AUTOCLEAR))
13✔
1276
                return 0;
1277

1278
        SET_FLAG(info.lo_flags, LO_FLAGS_AUTOCLEAR, autoclear);
13✔
1279

1280
        if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
13✔
1281
                return -errno;
×
1282

1283
        return 1;
1284
}
1285

1286
int loop_device_set_filename(LoopDevice *d, const char *name) {
5✔
1287
        struct loop_info64 info;
5✔
1288

1289
        assert(d);
5✔
1290

1291
        /* Sets the .lo_file_name of the loopback device. This is supposed to contain the path to the file
1292
         * backing the block device, but is actually just a free-form string you can pass to the kernel. Most
1293
         * tools that actually care for the backing file path use the sysfs attribute file loop/backing_file
1294
         * which is a kernel generated string, subject to file system namespaces and such.
1295
         *
1296
         * .lo_file_name is useful since userspace can select it freely when creating a loopback block
1297
         * device, and we can use it for /dev/disk/by-loop-ref/ symlinks, and similar, so that apps can
1298
         * recognize their own loopback files. */
1299

1300
        if (name && strlen(name) >= sizeof(info.lo_file_name))
5✔
1301
                return -ENOBUFS;
5✔
1302

1303
        if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
5✔
1304
                return -errno;
×
1305

1306
        if (strneq((char*) info.lo_file_name, strempty(name), sizeof(info.lo_file_name)))
5✔
1307
                return 0;
1308

1309
        if (name) {
5✔
1310
                strncpy((char*) info.lo_file_name, name, sizeof(info.lo_file_name)-1);
5✔
1311
                info.lo_file_name[sizeof(info.lo_file_name)-1] = 0;
5✔
1312
        } else
1313
                memzero(info.lo_file_name, sizeof(info.lo_file_name));
×
1314

1315
        if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
5✔
1316
                return -errno;
×
1317

1318
        return 1;
1319
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc