• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 20417900562

21 Dec 2025 07:31PM UTC coverage: 72.5% (-0.2%) from 72.701%
20417900562

push

github

DaanDeMeyer
mkosi: Use initrd as exitrd

Let's speed up image builds by avoiding building
an exitrd and instead reusing the initrd image for
the same purpose.

309142 of 426400 relevant lines covered (72.5%)

1141502.27 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

63.52
/src/shared/loop-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#if HAVE_VALGRIND_MEMCHECK_H
4
#include <valgrind/memcheck.h>
5
#endif
6

7
#include <fcntl.h>
8
#include <linux/loop.h>
9
#include <sys/file.h>
10
#include <sys/ioctl.h>
11
#include <unistd.h>
12

13
#include "sd-device.h"
14

15
#include "alloc-util.h"
16
#include "blockdev-util.h"
17
#include "data-fd-util.h"
18
#include "device-util.h"
19
#include "devnum-util.h"
20
#include "dissect-image.h"
21
#include "env-util.h"
22
#include "errno-util.h"
23
#include "fd-util.h"
24
#include "fileio.h"
25
#include "fs-util.h"
26
#include "loop-util.h"
27
#include "parse-util.h"
28
#include "path-util.h"
29
#include "random-util.h"
30
#include "stat-util.h"
31
#include "stdio-util.h"
32
#include "string-util.h"
33
#include "time-util.h"
34

35
static void cleanup_clear_loop_close(int *fd) {
1,958✔
36
        if (*fd < 0)
1,958✔
37
                return;
38

39
        (void) ioctl(*fd, LOOP_CLR_FD);
×
40
        (void) safe_close(*fd);
×
41
}
42

43
static int loop_is_bound(int fd) {
1,958✔
44
        struct loop_info64 info;
1,958✔
45

46
        if (ioctl(ASSERT_FD(fd), LOOP_GET_STATUS64, &info) < 0) {
1,958✔
47
                if (errno == ENXIO)
1,958✔
48
                        return false; /* not bound! */
1,958✔
49

50
                return -errno;
×
51
        }
52

53
        return true; /* bound! */
54
}
55

56
static int open_lock_fd(int primary_fd, int operation) {
2,099✔
57
        _cleanup_close_ int lock_fd = -EBADF;
2,099✔
58

59
        assert(IN_SET(operation & ~LOCK_NB, LOCK_SH, LOCK_EX));
2,099✔
60

61
        lock_fd = fd_reopen(ASSERT_FD(primary_fd), O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2,099✔
62
        if (lock_fd < 0)
2,099✔
63
                return lock_fd;
64

65
        if (flock(lock_fd, operation) < 0)
2,099✔
66
                return -errno;
×
67

68
        return TAKE_FD(lock_fd);
69
}
70

71
static int loop_configure_verify_direct_io(int fd, const struct loop_config *c) {
1,958✔
72
        assert(fd >= 0);
1,958✔
73
        assert(c);
1,958✔
74

75
        if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO)) {
1,958✔
76
                struct loop_info64 info;
1,957✔
77

78
                if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0)
1,957✔
79
                        return log_debug_errno(errno, "Failed to issue LOOP_GET_STATUS64: %m");
×
80

81
#if HAVE_VALGRIND_MEMCHECK_H
82
                VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
83
#endif
84

85
                /* On older kernels (<= 5.3) it was necessary to set the block size of the loopback block
86
                 * device to the logical block size of the underlying file system. Since there was no nice
87
                 * way to query the value, we are not bothering to do this however. On newer kernels the
88
                 * block size is propagated automatically and does not require intervention from us. We'll
89
                 * check here if enabling direct IO worked, to make this easily debuggable however.
90
                 *
91
                 * (Should anyone really care and actually wants direct IO on old kernels: it might be worth
92
                 * enabling direct IO with iteratively larger block sizes until it eventually works.)
93
                 *
94
                 * On older kernels (e.g.: 5.10) when this is attempted on a file stored on a dm-crypt
95
                 * backed partition the kernel will start returning I/O errors when accessing the mounted
96
                 * loop device, so return a recognizable error that causes the operation to be started
97
                 * from scratch without the LO_FLAGS_DIRECT_IO flag. */
98
                if (!FLAGS_SET(info.lo_flags, LO_FLAGS_DIRECT_IO))
1,957✔
99
                        return log_debug_errno(
×
100
                                        SYNTHETIC_ERRNO(ENOANO),
101
                                        "Could not enable direct IO mode, retrying in buffered IO mode.");
102
        }
103

104
        return 0;
105
}
106

107
static int loop_configure_verify(int fd, const struct loop_config *c) {
1,958✔
108
        bool broken = false;
1,958✔
109
        int r;
1,958✔
110

111
        assert(fd >= 0);
1,958✔
112
        assert(c);
1,958✔
113

114
        if (c->block_size != 0) {
1,958✔
115
                uint32_t ssz;
1,958✔
116

117
                r = blockdev_get_sector_size(fd, &ssz);
1,958✔
118
                if (r < 0)
1,958✔
119
                        return r;
×
120

121
                if (ssz != c->block_size) {
1,958✔
122
                        log_debug("LOOP_CONFIGURE didn't honour requested block size %" PRIu32 ", got %" PRIu32 " instead. Ignoring.", c->block_size, ssz);
×
123
                        broken = true;
124
                }
125
        }
126

127
        if (c->info.lo_sizelimit != 0) {
1,958✔
128
                /* Kernel 5.8 vanilla doesn't properly propagate the size limit into the
129
                 * block device. If it's used, let's immediately check if it had the desired
130
                 * effect hence. And if not use classic LOOP_SET_STATUS64. */
131
                uint64_t z;
88✔
132

133
                r = blockdev_get_device_size(fd, &z);
88✔
134
                if (r < 0)
88✔
135
                        return r;
×
136

137
                if (z != c->info.lo_sizelimit) {
88✔
138
                        log_debug("LOOP_CONFIGURE is broken, doesn't honour .info.lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
×
139
                        broken = true;
140
                }
141
        }
142

143
        if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_PARTSCAN)) {
1,958✔
144
                /* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag
145
                 * into the block device. Let's hence verify if things work correctly here
146
                 * before returning. */
147

148
                r = blockdev_partscan_enabled_fd(fd);
1,793✔
149
                if (r < 0)
1,793✔
150
                        return r;
151
                if (r == 0) {
1,793✔
152
                        log_debug("LOOP_CONFIGURE is broken, doesn't honour LO_FLAGS_PARTSCAN. Falling back to LOOP_SET_STATUS64.");
×
153
                        broken = true;
154
                }
155
        }
156

157
        r = loop_configure_verify_direct_io(fd, c);
1,958✔
158
        if (r < 0)
1,958✔
159
                return r;
160

161
        return !broken;
1,958✔
162
}
163

164
static int loop_configure_fallback(int fd, const struct loop_config *c) {
×
165
        struct loop_info64 info_copy;
×
166
        int r;
×
167

168
        assert(fd >= 0);
×
169
        assert(c);
×
170

171
        /* Only some of the flags LOOP_CONFIGURE can set are also settable via LOOP_SET_STATUS64, hence mask
172
         * them out. */
173
        info_copy = c->info;
×
174
        info_copy.lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS;
×
175

176
        /* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64
177
         * ioctl can return EAGAIN in case we change the info.lo_offset field, if someone else is accessing the
178
         * block device while we try to reconfigure it. This is a pretty common case, since udev might
179
         * instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways:
180
         * first, let's take the BSD lock to ensure that udev will not step in between the point in
181
         * time where we attach the fd and where we reconfigure the device. Secondly, let's wait 50ms on
182
         * EAGAIN and retry. The former should be an efficient mechanism to avoid we have to wait 50ms
183
         * needlessly if we are just racing against udev. The latter is protection against all other cases,
184
         * i.e. peers that do not take the BSD lock. */
185

186
        for (unsigned n_attempts = 0;;) {
×
187
                if (ioctl(fd, LOOP_SET_STATUS64, &info_copy) >= 0)
×
188
                        break;
189

190
                if (errno != EAGAIN || ++n_attempts >= 64)
×
191
                        return log_debug_errno(errno, "Failed to configure loopback block device: %m");
×
192

193
                /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more
194
                 * failed attempts we see */
195
                (void) usleep_safe(UINT64_C(10) * USEC_PER_MSEC +
×
196
                              random_u64_range(UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
×
197
        }
198

199
        /* If a block size is requested then try to configure it. If that doesn't work, ignore errors, but
200
         * afterwards, let's validate what is in effect, and if it doesn't match what we want, fail */
201
        if (c->block_size != 0) {
×
202
                uint32_t ssz;
×
203

204
                if (ioctl(fd, LOOP_SET_BLOCK_SIZE, (unsigned long) c->block_size) < 0)
×
205
                        log_debug_errno(errno, "Failed to set sector size, ignoring: %m");
×
206

207
                r = blockdev_get_sector_size(fd, &ssz);
×
208
                if (r < 0)
×
209
                        return log_debug_errno(r, "Failed to read sector size: %m");
×
210
                if (ssz != c->block_size)
×
211
                        return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Sector size of loopback device doesn't match what we requested, refusing.");
×
212
        }
213

214
        /* LO_FLAGS_DIRECT_IO is a flags we need to configure via explicit ioctls. */
215
        if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO))
×
216
                if (ioctl(fd, LOOP_SET_DIRECT_IO, 1UL) < 0)
×
217
                        log_debug_errno(errno, "Failed to enable direct IO mode, ignoring: %m");
×
218

219
        return loop_configure_verify_direct_io(fd, c);
×
220
}
221

222
static int loop_configure(
1,958✔
223
                int nr,
224
                int open_flags,
225
                int lock_op,
226
                const struct loop_config *c,
227
                LoopDevice **ret) {
228

229
        static bool loop_configure_broken = false;
1,958✔
230

231
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
1,958✔
232
        _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -EBADF; /* This must be declared before lock_fd. */
×
233
        _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
3,916✔
234
        _cleanup_free_ char *node = NULL;
1,958✔
235
        uint64_t diskseq = 0;
1,958✔
236
        dev_t devno;
1,958✔
237
        int r;
1,958✔
238

239
        assert(nr >= 0);
1,958✔
240
        assert(c);
1,958✔
241
        assert(ret);
1,958✔
242

243
        if (asprintf(&node, "/dev/loop%i", nr) < 0)
1,958✔
244
                return log_oom_debug();
×
245

246
        r = sd_device_new_from_devname(&dev, node);
1,958✔
247
        if (r < 0)
1,958✔
248
                return log_debug_errno(r, "Failed to create sd_device object for \"%s\": %m", node);
×
249

250
        r = sd_device_get_devnum(dev, &devno);
1,958✔
251
        if (r < 0)
1,958✔
252
                return log_device_debug_errno(dev, r, "Failed to get devnum: %m");
×
253

254
        fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
1,958✔
255
        if (fd < 0)
1,958✔
256
                return log_device_debug_errno(dev, fd, "Failed to open device: %m");
×
257

258
        /* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened
259
         * fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on
260
         * block devices to reprobe them, hence by having a separate fd we will later close() we can ensure
261
         * we trigger udev after everything is done. If we'd lock our own fd instead and keep it open for a
262
         * long time udev would possibly never run on it again, even though the fd is unlocked, simply
263
         * because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to
264
         * automatically release the lock, after we are done. */
265
        lock_fd = open_lock_fd(fd, LOCK_EX);
1,958✔
266
        if (lock_fd < 0)
1,958✔
267
                return log_device_debug_errno(dev, lock_fd, "Failed to acquire lock: %m");
×
268

269
        log_device_debug(dev, "Acquired exclusive lock.");
1,985✔
270

271
        /* Let's see if backing file is really unattached. Someone may already attach a backing file without
272
         * taking BSD lock. */
273
        r = loop_is_bound(fd);
1,958✔
274
        if (r < 0)
1,958✔
275
                return log_device_debug_errno(dev, r, "Failed to check if the loopback block device is bound: %m");
×
276
        if (r > 0)
1,958✔
277
                return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EBUSY),
×
278
                                              "The loopback block device is already bound, ignoring.");
279

280
        /* Let's see if the device is really detached, i.e. currently has no associated partition block
281
         * devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
282
         * superficially is detached but still has partition block devices associated for it. Let's then
283
         * manually remove the partitions via BLKPG, and tell the caller we did that via EUCLEAN, so they try
284
         * again. */
285
        r = block_device_remove_all_partitions(dev, fd);
1,958✔
286
        if (r < 0)
1,958✔
287
                return log_device_debug_errno(dev, r, "Failed to remove partitions on the loopback block device: %m");
×
288
        if (r > 0)
1,958✔
289
                /* Removed all partitions. Let's report this to the caller, to try again, and count this as
290
                 * an attempt. */
291
                return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EUCLEAN),
×
292
                                              "Removed partitions on the loopback block device.");
293

294
        if (!loop_configure_broken) {
1,958✔
295
                if (ioctl(fd, LOOP_CONFIGURE, c) < 0) {
1,958✔
296
                        /* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other errors. */
297
                        if (!ERRNO_IS_IOCTL_NOT_SUPPORTED(errno))
×
298
                                return log_device_debug_errno(dev, errno, "ioctl(LOOP_CONFIGURE) failed: %m");
×
299

300
                        loop_configure_broken = true;
×
301
                } else {
302
                        loop_with_fd = TAKE_FD(fd);
1,958✔
303

304
                        r = loop_configure_verify(loop_with_fd, c);
1,958✔
305
                        if (r < 0)
1,958✔
306
                                return log_device_debug_errno(dev, r, "Failed to verify if loopback block device is correctly configured: %m");
×
307
                        if (r == 0) {
1,958✔
308
                                /* LOOP_CONFIGURE doesn't work. Remember that. */
309
                                loop_configure_broken = true;
×
310

311
                                /* We return EBUSY here instead of retrying immediately with LOOP_SET_FD,
312
                                 * because LOOP_CLR_FD is async: if the operation cannot be executed right
313
                                 * away it just sets the autoclear flag on the device. This means there's a
314
                                 * good chance we cannot actually reuse the loopback device right-away. Hence
315
                                 * let's assume it's busy, avoid the trouble and let the calling loop call us
316
                                 * again with a new, likely unused device. */
317
                                return -EBUSY;
×
318
                        }
319
                }
320
        }
321

322
        if (loop_configure_broken) {
1,958✔
323
                if (ioctl(fd, LOOP_SET_FD, c->fd) < 0)
×
324
                        return log_device_debug_errno(dev, errno, "ioctl(LOOP_SET_FD) failed: %m");
×
325

326
                loop_with_fd = TAKE_FD(fd);
×
327

328
                r = loop_configure_fallback(loop_with_fd, c);
×
329
                if (r < 0)
×
330
                        return r;
331
        }
332

333
        r = fd_get_diskseq(loop_with_fd, &diskseq);
1,958✔
334
        if (r < 0 && r != -EOPNOTSUPP)
1,958✔
335
                return log_device_debug_errno(dev, r, "Failed to get diskseq: %m");
×
336

337
        switch (lock_op & ~LOCK_NB) {
1,958✔
338
        case LOCK_EX: /* Already in effect */
339
                break;
340
        case LOCK_SH: /* Downgrade */
1,836✔
341
                if (flock(lock_fd, lock_op) < 0)
1,836✔
342
                        return log_device_debug_errno(dev, errno, "Failed to downgrade lock level: %m");
×
343
                break;
344
        case LOCK_UN: /* Release */
×
345
                lock_fd = safe_close(lock_fd);
×
346
                break;
347
        default:
×
348
                assert_not_reached();
×
349
        }
350

351
        uint64_t device_size;
1,958✔
352
        r = blockdev_get_device_size(loop_with_fd, &device_size);
1,958✔
353
        if (r < 0)
1,958✔
354
                return log_device_debug_errno(dev, r, "Failed to get loopback device size: %m");
×
355

356
        LoopDevice *d = new(LoopDevice, 1);
1,958✔
357
        if (!d)
1,958✔
358
                return log_oom_debug();
×
359

360
        *d = (LoopDevice) {
1,958✔
361
                .n_ref = 1,
362
                .fd = TAKE_FD(loop_with_fd),
1,958✔
363
                .lock_fd = TAKE_FD(lock_fd),
1,958✔
364
                .node = TAKE_PTR(node),
1,958✔
365
                .nr = nr,
366
                .devno = devno,
367
                .dev = TAKE_PTR(dev),
1,958✔
368
                .diskseq = diskseq,
369
                .sector_size = c->block_size,
1,958✔
370
                .device_size = device_size,
371
                .created = true,
372
        };
373

374
        *ret = TAKE_PTR(d);
1,958✔
375
        return 0;
1,958✔
376
}
377

378
static int fd_get_max_discard(int fd, uint64_t *ret) {
×
379
        struct stat st;
×
380
        char sysfs_path[STRLEN("/sys/dev/block/" ":" "/queue/discard_max_bytes") + DECIMAL_STR_MAX(dev_t) * 2 + 1];
×
381
        _cleanup_free_ char *buffer = NULL;
×
382
        int r;
×
383

384
        assert(ret);
×
385

386
        if (fstat(ASSERT_FD(fd), &st) < 0)
×
387
                return -errno;
×
388

389
        if (!S_ISBLK(st.st_mode))
×
390
                return -ENOTBLK;
391

392
        xsprintf(sysfs_path, "/sys/dev/block/" DEVNUM_FORMAT_STR "/queue/discard_max_bytes", DEVNUM_FORMAT_VAL(st.st_rdev));
×
393

394
        r = read_one_line_file(sysfs_path, &buffer);
×
395
        if (r < 0)
×
396
                return r;
397

398
        return safe_atou64(buffer, ret);
×
399
}
400

401
static int fd_set_max_discard(int fd, uint64_t max_discard) {
×
402
        struct stat st;
×
403
        char sysfs_path[STRLEN("/sys/dev/block/" ":" "/queue/discard_max_bytes") + DECIMAL_STR_MAX(dev_t) * 2 + 1];
×
404

405
        if (fstat(ASSERT_FD(fd), &st) < 0)
×
406
                return -errno;
×
407

408
        if (!S_ISBLK(st.st_mode))
×
409
                return -ENOTBLK;
410

411
        xsprintf(sysfs_path, "/sys/dev/block/" DEVNUM_FORMAT_STR "/queue/discard_max_bytes", DEVNUM_FORMAT_VAL(st.st_rdev));
×
412

413
        return write_string_filef(sysfs_path, WRITE_STRING_FILE_DISABLE_BUFFER, "%" PRIu64, max_discard);
×
414
}
415

416
static int loop_device_make_internal(
4,106✔
417
                const char *path,
418
                int fd,
419
                int open_flags,
420
                uint64_t offset,
421
                uint64_t size,
422
                uint32_t sector_size,
423
                uint32_t loop_flags,
424
                int lock_op,
425
                LoopDevice **ret) {
426

427
        _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
×
428
        _cleanup_close_ int reopened_fd = -EBADF, control = -EBADF;
8,212✔
429
        _cleanup_free_ char *backing_file = NULL;
4,106✔
430
        struct loop_config config;
4,106✔
431
        int r, f_flags;
4,106✔
432
        struct stat st;
4,106✔
433

434
        assert(ret);
4,106✔
435
        assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
4,106✔
436

437
        if (fstat(ASSERT_FD(fd), &st) < 0)
4,106✔
438
                return -errno;
×
439

440
        if (S_ISBLK(st.st_mode)) {
4,106✔
441
                if (offset == 0 && IN_SET(size, 0, UINT64_MAX))
×
442
                        /* If this is already a block device and we are supposed to cover the whole of it
443
                         * then store an fd to the original open device node — and do not actually create an
444
                         * unnecessary loopback device for it. */
445
                        return loop_device_open_from_fd(fd, open_flags, lock_op, ret);
×
446
        } else {
447
                r = stat_verify_regular(&st);
4,106✔
448
                if (r < 0)
4,106✔
449
                        return r;
450
        }
451

452
        if (path) {
1,978✔
453
                r = path_make_absolute_cwd(path, &backing_file);
1,855✔
454
                if (r < 0)
1,855✔
455
                        return r;
456

457
                path_simplify(backing_file);
1,855✔
458
        } else {
459
                r = fd_get_path(fd, &backing_file);
123✔
460
                if (r < 0)
123✔
461
                        return r;
462
        }
463

464
        f_flags = fcntl(fd, F_GETFL);
1,978✔
465
        if (f_flags < 0)
1,978✔
466
                return -errno;
×
467

468
        if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) != FLAGS_SET(f_flags, O_DIRECT)) {
1,978✔
469
                /* If LO_FLAGS_DIRECT_IO is requested, then make sure we have the fd open with O_DIRECT, as
470
                 * that's required. Conversely, if it's off require that O_DIRECT is off too (that's because
471
                 * new kernels will implicitly enable LO_FLAGS_DIRECT_IO if O_DIRECT is set).
472
                 *
473
                 * Our intention here is that LO_FLAGS_DIRECT_IO is the primary knob, and O_DIRECT derived
474
                 * from that automatically. */
475

476
                reopened_fd = fd_reopen(fd, (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0)|O_CLOEXEC|O_NONBLOCK|open_flags);
122✔
477
                if (reopened_fd < 0) {
122✔
478
                        if (!FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO))
×
479
                                return log_debug_errno(reopened_fd, "Failed to reopen file descriptor without O_DIRECT: %m");
×
480

481
                        /* Some file systems might not support O_DIRECT, let's gracefully continue without it then. */
482
                        log_debug_errno(reopened_fd, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without.");
×
483
                        loop_flags &= ~LO_FLAGS_DIRECT_IO;
×
484
                } else
485
                        fd = reopened_fd; /* From now on, operate on our new O_DIRECT fd */
486
        }
487

488
        control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1,978✔
489
        if (control < 0)
1,978✔
490
                return -errno;
20✔
491

492
        if (sector_size == 0)
1,958✔
493
                /* If no sector size is specified, default to the classic default */
494
                sector_size = 512;
×
495
        else if (sector_size == UINT32_MAX) {
1,958✔
496

497
                if (S_ISBLK(st.st_mode))
1,836✔
498
                        /* If the sector size is specified as UINT32_MAX we'll propagate the sector size of
499
                         * the underlying block device. */
500
                        r = blockdev_get_sector_size(fd, &sector_size);
×
501
                else {
502
                        _cleanup_close_ int non_direct_io_fd = -EBADF;
4,106✔
503
                        int probe_fd;
1,836✔
504

505
                        assert(S_ISREG(st.st_mode));
1,836✔
506

507
                        /* If sector size is specified as UINT32_MAX, we'll try to probe the right sector
508
                         * size of the image in question by looking for the GPT partition header at various
509
                         * offsets. This of course only works if the image already has a disk label.
510
                         *
511
                         * So here we actually want to read the file contents ourselves. This is quite likely
512
                         * not going to work if we managed to enable O_DIRECT, because in such a case there
513
                         * are some pretty strict alignment requirements to offset, size and target, but
514
                         * there's no way to query what alignment specifically is actually required. Hence,
515
                         * let's avoid the mess, and temporarily open an fd without O_DIRECT for the probing
516
                         * logic. */
517

518
                        if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO)) {
1,836✔
519
                                non_direct_io_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1,835✔
520
                                if (non_direct_io_fd < 0)
1,835✔
521
                                        return non_direct_io_fd;
×
522

523
                                probe_fd = non_direct_io_fd;
524
                        } else
525
                                probe_fd = fd;
526

527
                        r = probe_sector_size(probe_fd, &sector_size);
1,836✔
528
                }
529
                if (r < 0)
1,836✔
530
                        return r;
531
        }
532

533
        config = (struct loop_config) {
3,916✔
534
                .fd = fd,
535
                .block_size = sector_size,
536
                .info = {
537
                        /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
538
                        .lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((open_flags & O_ACCMODE_STRICT) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
1,958✔
539
                        .lo_offset = offset,
540
                        .lo_sizelimit = size == UINT64_MAX ? 0 : size,
1,958✔
541
                },
542
        };
543

544
        /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
545
         * be gone already, taken by somebody else racing against us. */
546
        for (unsigned n_attempts = 0;;) {
1,958✔
547
                usec_t usec;
1,958✔
548
                int nr;
1,958✔
549

550
                /* Let's take a lock on the control device first. On a busy system, where many programs
551
                 * attempt to allocate a loopback device at the same time, we might otherwise keep looping
552
                 * around relatively heavy operations: asking for a free loopback device, then opening it,
553
                 * validating it, attaching something to it. Let's serialize this whole operation, to make
554
                 * unnecessary busywork less likely. Note that this is just something we do to optimize our
555
                 * own code (and whoever else decides to use LOCK_EX locks for this), taking this lock is not
556
                 * necessary, it just means it's less likely we have to iterate through this loop again and
557
                 * again if our own code races against our own code.
558
                 *
559
                 * Note: our lock protocol is to take the /dev/loop-control lock first, and the block device
560
                 * lock second, if both are taken, and always in this order, to avoid ABBA locking issues. */
561
                if (flock(control, LOCK_EX) < 0)
1,958✔
562
                        return -errno;
×
563

564
                nr = ioctl(control, LOOP_CTL_GET_FREE);
1,958✔
565
                if (nr < 0)
1,958✔
566
                        return -errno;
×
567

568
                r = loop_configure(nr, open_flags, lock_op, &config, &d);
1,958✔
569
                if (r >= 0)
1,958✔
570
                        break;
571

572
                /* -ENODEV or friends: Somebody might've gotten the same number from the kernel, used the
573
                 * device, and called LOOP_CTL_REMOVE on it. Let's retry with a new number.
574
                 * -EBUSY: a file descriptor is already bound to the loopback block device.
575
                 * -EUCLEAN: some left-over partition devices that were cleaned up.
576
                 * -ENOANO: we tried to use LO_FLAGS_DIRECT_IO but the kernel rejected it. */
577
                if (!ERRNO_IS_DEVICE_ABSENT(r) && !IN_SET(r, -EBUSY, -EUCLEAN, -ENOANO))
×
578
                        return r;
579

580
                /* OK, this didn't work, let's try again a bit later, but first release the lock on the
581
                 * control device */
582
                if (flock(control, LOCK_UN) < 0)
×
583
                        return -errno;
×
584

585
                if (++n_attempts >= 64) /* Give up eventually */
×
586
                        return -EBUSY;
587

588
                /* If we failed to enable direct IO mode, let's retry without it. We restart the process as
589
                 * on some combination of kernel version and storage filesystem, the kernel is very unhappy
590
                 * about a failed DIRECT_IO enablement and throws I/O errors. */
591
                if (r == -ENOANO && FLAGS_SET(config.info.lo_flags, LO_FLAGS_DIRECT_IO)) {
×
592
                        config.info.lo_flags &= ~LO_FLAGS_DIRECT_IO;
×
593
                        open_flags &= ~O_DIRECT;
×
594

595
                        int non_direct_io_fd = fd_reopen(config.fd, O_CLOEXEC|O_NONBLOCK|open_flags);
×
596
                        if (non_direct_io_fd < 0)
×
597
                                return log_debug_errno(
×
598
                                                non_direct_io_fd,
599
                                                "Failed to reopen file descriptor without O_DIRECT: %m");
600

601
                        safe_close(reopened_fd);
×
602
                        fd = config.fd = /* For cleanups */ reopened_fd = non_direct_io_fd;
×
603
                }
604

605
                /* Wait some random time, to make collision less likely. Let's pick a random time in the
606
                 * range 0ms…250ms, linearly scaled by the number of failed attempts. */
607
                usec = random_u64_range(UINT64_C(10) * USEC_PER_MSEC +
×
608
                                        UINT64_C(240) * USEC_PER_MSEC * n_attempts/64);
×
609
                log_debug("Trying again after %s.", FORMAT_TIMESPAN(usec, USEC_PER_MSEC));
×
610
                (void) usleep_safe(usec);
×
611
        }
612

613
        if (S_ISBLK(st.st_mode)) {
1,958✔
614
                /* Propagate backing device's discard byte limit to our loopback block device. We do this in
615
                 * order to avoid that (supposedly quick) discard requests on the loopback device get turned
616
                 * into (likely slow) zero-out requests on backing devices that do not support discarding
617
                 * natively, but do support zero-out. */
618
                uint64_t discard_max_bytes;
×
619

620
                r = fd_get_max_discard(fd, &discard_max_bytes);
×
621
                if (r < 0)
×
622
                        log_debug_errno(r, "Failed to read 'discard_max_bytes' of backing device, ignoring: %m");
×
623
                else {
624
                        r = fd_set_max_discard(d->fd, discard_max_bytes);
×
625
                        if (r < 0)
×
626
                                log_debug_errno(r, "Failed to write 'discard_max_bytes' of loop device, ignoring: %m");
×
627
                }
628
        }
629

630
        d->backing_file = TAKE_PTR(backing_file);
1,958✔
631
        d->backing_inode = st.st_ino;
1,958✔
632
        d->backing_devno = st.st_dev;
1,958✔
633

634
        log_debug("Successfully acquired %s, devno=%u:%u, nr=%i, diskseq=%" PRIu64,
1,958✔
635
                  d->node,
636
                  major(d->devno), minor(d->devno),
637
                  d->nr,
638
                  d->diskseq);
639

640
        *ret = TAKE_PTR(d);
1,958✔
641
        return 0;
1,958✔
642
}
643

644
static uint32_t loop_flags_mangle(uint32_t loop_flags) {
4,106✔
645
        int r;
4,106✔
646

647
        r = getenv_bool("SYSTEMD_LOOP_DIRECT_IO");
4,106✔
648
        if (r < 0 && r != -ENXIO)
4,106✔
649
                log_debug_errno(r, "Failed to parse $SYSTEMD_LOOP_DIRECT_IO, ignoring: %m");
×
650

651
        return UPDATE_FLAG(loop_flags, LO_FLAGS_DIRECT_IO, r != 0); /* Turn on LO_FLAGS_DIRECT_IO by default, unless explicitly configured to off. */
4,106✔
652
}
653

654
int loop_device_make(
122✔
655
                int fd,
656
                int open_flags,
657
                uint64_t offset,
658
                uint64_t size,
659
                uint32_t sector_size,
660
                uint32_t loop_flags,
661
                int lock_op,
662
                LoopDevice **ret) {
663

664
        assert(fd >= 0);
122✔
665
        assert(ret);
122✔
666

667
        return loop_device_make_internal(
122✔
668
                        NULL,
669
                        fd,
670
                        open_flags,
671
                        offset,
672
                        size,
673
                        sector_size,
674
                        loop_flags_mangle(loop_flags),
675
                        lock_op,
676
                        ret);
677
}
678

679
int loop_device_make_by_path_at(
3,984✔
680
                int dir_fd,
681
                const char *path,
682
                int open_flags,
683
                uint32_t sector_size,
684
                uint32_t loop_flags,
685
                int lock_op,
686
                LoopDevice **ret) {
687

688
        int r, basic_flags, direct_flags, rdwr_flags;
3,984✔
689
        _cleanup_close_ int fd = -EBADF;
3,984✔
690
        bool direct = false;
3,984✔
691

692
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
3,984✔
693
        assert(path);
3,984✔
694
        assert(ret);
3,984✔
695
        assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
3,984✔
696

697
        /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying
698
         * read-only if we cannot. */
699

700
        loop_flags = loop_flags_mangle(loop_flags);
3,984✔
701

702
        /* Let's open with O_DIRECT if we can. But not all file systems support that, hence fall back to
703
         * non-O_DIRECT mode automatically, if it fails. */
704

705
        basic_flags = O_CLOEXEC|O_NONBLOCK|O_NOCTTY;
3,984✔
706
        direct_flags = FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0;
3,984✔
707
        rdwr_flags = open_flags >= 0 ? open_flags : O_RDWR;
3,984✔
708

709
        fd = xopenat(dir_fd, path, basic_flags|direct_flags|rdwr_flags);
3,984✔
710
        if (fd < 0 && direct_flags != 0) /* If we had O_DIRECT on, and things failed with that, let's immediately try again without */
3,984✔
711
                fd = xopenat(dir_fd, path, basic_flags|rdwr_flags);
2,129✔
712
        else
713
                direct = direct_flags != 0;
×
714
        if (fd < 0) {
3,984✔
715
                r = fd;
1✔
716

717
                /* Retry read-only? */
718
                if (open_flags >= 0 || !ERRNO_IS_NEG_FS_WRITE_REFUSED(r))
3,985✔
719
                        return r;
720

721
                fd = xopenat(dir_fd, path, basic_flags|direct_flags|O_RDONLY);
×
722
                if (fd < 0 && direct_flags != 0) /* as above */
×
723
                        fd = xopenat(dir_fd, path, basic_flags|O_RDONLY);
×
724
                else
725
                        direct = direct_flags != 0;
×
726
                if (fd < 0)
×
727
                        return r; /* Propagate original error */
728

729
                open_flags = O_RDONLY;
730
        } else if (open_flags < 0)
3,983✔
731
                open_flags = O_RDWR;
81✔
732

733
        log_debug("Opened '%s' in %s access mode%s, with O_DIRECT %s%s.",
15,742✔
734
                  path,
735
                  open_flags == O_RDWR ? "O_RDWR" : "O_RDONLY",
736
                  open_flags != rdwr_flags ? " (O_RDWR was requested but not allowed)" : "",
737
                  direct ? "enabled" : "disabled",
738
                  direct != (direct_flags != 0) ? " (O_DIRECT was requested but not supported)" : "");
739

740
        return loop_device_make_internal(
3,983✔
741
                        dir_fd == AT_FDCWD ? path : NULL,
742
                        fd,
743
                        open_flags,
744
                        /* offset= */ 0,
745
                        /* size= */ 0,
746
                        sector_size,
747
                        loop_flags,
748
                        lock_op,
749
                        ret);
750
}
751

752
int loop_device_make_by_path_memory(
1✔
753
                const char *path,
754
                int open_flags,
755
                uint32_t sector_size,
756
                uint32_t loop_flags,
757
                int lock_op,
758
                LoopDevice **ret) {
759

760
        _cleanup_close_ int fd = -EBADF, mfd = -EBADF;
1✔
761
        _cleanup_free_ char *fn = NULL;
1✔
762
        struct stat st;
1✔
763
        int r;
1✔
764

765
        assert(path);
1✔
766
        assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
1✔
767
        assert(ret);
1✔
768

769
        loop_flags &= ~LO_FLAGS_DIRECT_IO; /* memfds don't support O_DIRECT, hence LO_FLAGS_DIRECT_IO can't be used either */
1✔
770

771
        fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
1✔
772
        if (fd < 0)
1✔
773
                return -errno;
×
774

775
        if (fstat(fd, &st) < 0)
1✔
776
                return -errno;
×
777

778
        if (!S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode))
1✔
779
                return -EBADF;
780

781
        r = path_extract_filename(path, &fn);
1✔
782
        if (r < 0)
1✔
783
                return r;
784

785
        mfd = memfd_clone_fd(fd, fn, open_flags|O_CLOEXEC);
1✔
786
        if (mfd < 0)
1✔
787
                return mfd;
788

789
        fd = safe_close(fd); /* Let's close the original early */
1✔
790

791
        return loop_device_make_internal(NULL, mfd, open_flags, 0, 0, sector_size, loop_flags, lock_op, ret);
1✔
792
}
793

794
static LoopDevice* loop_device_free(LoopDevice *d) {
2,011✔
795
        _cleanup_close_ int control = -EBADF;
2,011✔
796
        int r;
2,011✔
797

798
        if (!d)
2,011✔
799
                return NULL;
800

801
        /* Release any lock we might have on the device first. We want to open+lock the /dev/loop-control
802
         * device below, but our lock protocol says that if both control and block device locks are taken,
803
         * the control lock needs to be taken first, the block device lock second — in order to avoid ABBA
804
         * locking issues. Moreover, we want to issue LOOP_CLR_FD on the block device further down, and that
805
         * would fail if we had another fd open to the device. */
806
        d->lock_fd = safe_close(d->lock_fd);
2,011✔
807

808
        /* Let's open the control device early, and lock it, so that we can release our block device and
809
         * delete it in a synchronized fashion, and allocators won't needlessly see the block device as free
810
         * while we are about to delete it. */
811
        if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
2,011✔
812
                control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1,732✔
813
                if (control < 0)
1,732✔
814
                        log_debug_errno(errno, "Failed to open loop control device, cannot remove loop device '%s', ignoring: %m", strna(d->node));
×
815
                else if (flock(control, LOCK_EX) < 0)
1,732✔
816
                        log_debug_errno(errno, "Failed to lock loop control device, ignoring: %m");
×
817
        }
818

819
        /* Then let's release the loopback block device */
820
        if (d->fd >= 0) {
2,011✔
821
                /* Implicitly sync the device, since otherwise in-flight blocks might not get written */
822
                if (fsync(d->fd) < 0)
2,011✔
823
                        log_debug_errno(errno, "Failed to sync loop block device, ignoring: %m");
×
824

825
                if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
2,011✔
826
                        /* We are supposed to clear the loopback device. Let's do this synchronously: lock
827
                         * the device, manually remove all partitions and then clear it. This should ensure
828
                         * udev doesn't concurrently access the devices, and we can be reasonably sure that
829
                         * once we are done here the device is cleared and all its partition children
830
                         * removed. Note that we lock our primary device fd here (and not a separate locking
831
                         * fd, as we do during allocation, since we want to keep the lock all the way through
832
                         * the LOOP_CLR_FD, but that call would fail if we had more than one fd open.) */
833

834
                        if (flock(d->fd, LOCK_EX) < 0)
1,732✔
835
                                log_debug_errno(errno, "Failed to lock loop block device, ignoring: %m");
×
836

837
                        r = block_device_remove_all_partitions(d->dev, d->fd);
1,732✔
838
                        if (r < 0)
1,732✔
839
                                log_debug_errno(r, "Failed to remove partitions of loopback block device, ignoring: %m");
×
840

841
                        if (ioctl(d->fd, LOOP_CLR_FD) < 0)
1,732✔
842
                                log_debug_errno(errno, "Failed to clear loop device, ignoring: %m");
×
843
                }
844

845
                safe_close(d->fd);
2,011✔
846
        }
847

848
        /* Now that the block device is released, let's also try to remove it */
849
        if (control >= 0) {
2,011✔
850
                useconds_t delay = 5 * USEC_PER_MSEC;  /* A total delay of 5090 ms between 39 attempts,
851
                                                        * (4*5 + 5*10 + 5*20 + … + 3*640) = 5090. */
852

853
                for (unsigned attempt = 1;; attempt++) {
156✔
854
                        if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
1,888✔
855
                                break;
856
                        if (errno != EBUSY || attempt > 38) {
157✔
857
                                log_debug_errno(errno, "Failed to remove device %s: %m", strna(d->node));
1✔
858
                                break;
859
                        }
860
                        if (attempt % 5 == 0) {
156✔
861
                                log_debug("Device is still busy after %u attempts…", attempt);
15✔
862
                                delay *= 2;
15✔
863
                        }
864

865
                        (void) usleep_safe(delay);
156✔
866
                }
867
        }
868

869
        free(d->node);
2,011✔
870
        sd_device_unref(d->dev);
2,011✔
871
        free(d->backing_file);
2,011✔
872
        return mfree(d);
2,011✔
873
}
874

875
DEFINE_TRIVIAL_REF_UNREF_FUNC(LoopDevice, loop_device, loop_device_free);
6,179✔
876

877
void loop_device_relinquish(LoopDevice *d) {
160✔
878
        assert(d);
160✔
879

880
        /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
881
         * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
882

883
        d->relinquished = true;
160✔
884
}
160✔
885

886
void loop_device_unrelinquish(LoopDevice *d) {
22✔
887
        assert(d);
22✔
888
        d->relinquished = false;
22✔
889
}
22✔
890

891
int loop_device_open(
140✔
892
                sd_device *dev,
893
                int open_flags,
894
                int lock_op,
895
                LoopDevice **ret) {
896

897
        _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
140✔
898
        _cleanup_free_ char *node = NULL, *backing_file = NULL;
140✔
899
        dev_t devnum, backing_devno = 0;
140✔
900
        struct loop_info64 info;
140✔
901
        ino_t backing_inode = 0;
140✔
902
        uint64_t diskseq = 0;
140✔
903
        LoopDevice *d;
140✔
904
        const char *s;
140✔
905
        int r, nr = -1;
140✔
906

907
        assert(dev);
140✔
908
        assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
140✔
909
        assert(ret);
140✔
910

911
        /* Even if fd is provided through the argument in loop_device_open_from_fd(), we reopen the inode
912
         * here, instead of keeping just a dup() clone of it around, since we want to ensure that the
913
         * O_DIRECT flag of the handle we keep is off, we have our own file index, and have the right
914
         * read/write mode in effect. */
915
        fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
140✔
916
        if (fd < 0)
140✔
917
                return fd;
918

919
        if ((lock_op & ~LOCK_NB) != LOCK_UN) {
140✔
920
                lock_fd = open_lock_fd(fd, lock_op);
140✔
921
                if (lock_fd < 0)
140✔
922
                        return lock_fd;
923
        }
924

925
        if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0) {
140✔
926
#if HAVE_VALGRIND_MEMCHECK_H
927
                /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
928
                VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
929
#endif
930
                nr = info.lo_number;
28✔
931

932
                if (sd_device_get_sysattr_value(dev, "loop/backing_file", &s) >= 0) {
28✔
933
                        backing_file = strdup(s);
19✔
934
                        if (!backing_file)
19✔
935
                                return -ENOMEM;
936
                }
937

938
                backing_devno = info.lo_device;
28✔
939
                backing_inode = info.lo_inode;
28✔
940
        }
941

942
        r = fd_get_diskseq(fd, &diskseq);
140✔
943
        if (r < 0 && r != -EOPNOTSUPP)
140✔
944
                return r;
945

946
        uint32_t sector_size;
140✔
947
        r = blockdev_get_sector_size(fd, &sector_size);
140✔
948
        if (r < 0)
140✔
949
                return r;
950

951
        uint64_t device_size;
140✔
952
        r = blockdev_get_device_size(fd, &device_size);
140✔
953
        if (r < 0)
140✔
954
                return r;
955

956
        r = sd_device_get_devnum(dev, &devnum);
140✔
957
        if (r < 0)
140✔
958
                return r;
959

960
        r = sd_device_get_devname(dev, &s);
140✔
961
        if (r < 0)
140✔
962
                return r;
963

964
        node = strdup(s);
140✔
965
        if (!node)
140✔
966
                return -ENOMEM;
967

968
        d = new(LoopDevice, 1);
140✔
969
        if (!d)
140✔
970
                return -ENOMEM;
971

972
        *d = (LoopDevice) {
280✔
973
                .n_ref = 1,
974
                .fd = TAKE_FD(fd),
140✔
975
                .lock_fd = TAKE_FD(lock_fd),
140✔
976
                .nr = nr,
977
                .node = TAKE_PTR(node),
140✔
978
                .dev = sd_device_ref(dev),
140✔
979
                .backing_file = TAKE_PTR(backing_file),
140✔
980
                .backing_inode = backing_inode,
981
                .backing_devno = backing_devno,
982
                .relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
983
                .devno = devnum,
984
                .diskseq = diskseq,
985
                .sector_size = sector_size,
986
                .device_size = device_size,
987
                .created = false,
988
        };
989

990
        *ret = d;
140✔
991
        return 0;
140✔
992
}
993

994
int loop_device_open_from_fd(
2✔
995
                int fd,
996
                int open_flags,
997
                int lock_op,
998
                LoopDevice **ret) {
999

1000
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
2✔
1001
        int r;
2✔
1002

1003
        r = block_device_new_from_fd(ASSERT_FD(fd), 0, &dev);
2✔
1004
        if (r < 0)
2✔
1005
                return r;
1006

1007
        return loop_device_open(dev, open_flags, lock_op, ret);
2✔
1008
}
1009

1010
int loop_device_open_from_path(
×
1011
                const char *path,
1012
                int open_flags,
1013
                int lock_op,
1014
                LoopDevice **ret) {
1015

1016
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
×
1017
        int r;
×
1018

1019
        assert(path);
×
1020

1021
        r = block_device_new_from_path(path, 0, &dev);
×
1022
        if (r < 0)
×
1023
                return r;
1024

1025
        return loop_device_open(dev, open_flags, lock_op, ret);
×
1026
}
1027

1028
static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
×
1029
        char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1];
×
1030
        _cleanup_free_ char *buffer = NULL;
×
1031
        uint64_t current_offset, current_size, partno;
×
1032
        _cleanup_close_ int whole_fd = -EBADF;
×
1033
        struct stat st;
×
1034
        dev_t devno;
×
1035
        int r;
×
1036

1037
        /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
1038
         * loopback device), and changes the offset, if needed. This is a fancy wrapper around
1039
         * BLKPG_RESIZE_PARTITION. */
1040

1041
        if (fstat(ASSERT_FD(partition_fd), &st) < 0)
×
1042
                return -errno;
×
1043

1044
        assert(S_ISBLK(st.st_mode));
×
1045

1046
        xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev));
×
1047
        r = read_one_line_file(sysfs, &buffer);
×
1048
        if (r == -ENOENT) /* not a partition, cannot resize */
×
1049
                return -ENOTTY;
1050
        if (r < 0)
×
1051
                return r;
1052
        r = safe_atou64(buffer, &partno);
×
1053
        if (r < 0)
×
1054
                return r;
1055

1056
        xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/start", DEVNUM_FORMAT_VAL(st.st_rdev));
×
1057

1058
        buffer = mfree(buffer);
×
1059
        r = read_one_line_file(sysfs, &buffer);
×
1060
        if (r < 0)
×
1061
                return r;
1062
        r = safe_atou64(buffer, &current_offset);
×
1063
        if (r < 0)
×
1064
                return r;
1065
        if (current_offset > UINT64_MAX/512U)
×
1066
                return -EINVAL;
1067
        current_offset *= 512U;
×
1068

1069
        r = blockdev_get_device_size(partition_fd, &current_size);
×
1070
        if (r < 0)
×
1071
                return r;
1072

1073
        if (size == UINT64_MAX && offset == UINT64_MAX)
×
1074
                return 0;
1075
        if (current_size == size && current_offset == offset)
×
1076
                return 0;
1077

1078
        xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/../dev", DEVNUM_FORMAT_VAL(st.st_rdev));
×
1079

1080
        buffer = mfree(buffer);
×
1081
        r = read_one_line_file(sysfs, &buffer);
×
1082
        if (r < 0)
×
1083
                return r;
1084
        r = parse_devnum(buffer, &devno);
×
1085
        if (r < 0)
×
1086
                return r;
1087

1088
        whole_fd = r = device_open_from_devnum(S_IFBLK, devno, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY, NULL);
×
1089
        if (r < 0)
×
1090
                return r;
1091

1092
        return block_device_resize_partition(
×
1093
                        whole_fd,
1094
                        partno,
1095
                        offset == UINT64_MAX ? current_offset : offset,
1096
                        size == UINT64_MAX ? current_size : size);
1097
}
1098

1099
int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
16✔
1100
        struct loop_info64 info;
16✔
1101

1102
        assert(d);
16✔
1103
        assert(d->fd >= 0);
16✔
1104

1105
        /* Changes the offset/start of the loop device relative to the beginning of the underlying file or
1106
         * block device. If this loop device actually refers to a partition and not a loopback device, we'll
1107
         * try to adjust the partition offsets instead.
1108
         *
1109
         * If either offset or size is UINT64_MAX we won't change that parameter. */
1110

1111
        if (d->nr < 0) /* not a loopback device */
16✔
1112
                return resize_partition(d->fd, offset, size);
×
1113

1114
        if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
16✔
1115
                return -errno;
×
1116

1117
#if HAVE_VALGRIND_MEMCHECK_H
1118
        /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
1119
        VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
1120
#endif
1121

1122
        if ((size == UINT64_MAX || info.lo_sizelimit == size) &&
16✔
1123
            (offset == UINT64_MAX || info.lo_offset == offset))
×
1124
                return 0;
1125

1126
        if (size != UINT64_MAX)
16✔
1127
                info.lo_sizelimit = size;
16✔
1128
        if (offset != UINT64_MAX)
16✔
1129
                info.lo_offset = offset;
×
1130

1131
        return RET_NERRNO(ioctl(d->fd, LOOP_SET_STATUS64, &info));
16✔
1132
}
1133

1134
int loop_device_flock(LoopDevice *d, int operation) {
144✔
1135
        assert(IN_SET(operation & ~LOCK_NB, LOCK_UN, LOCK_SH, LOCK_EX));
144✔
1136
        assert(d);
144✔
1137

1138
        /* When unlocking just close the lock fd */
1139
        if ((operation & ~LOCK_NB) == LOCK_UN) {
144✔
1140
                d->lock_fd = safe_close(d->lock_fd);
142✔
1141
                return 0;
142✔
1142
        }
1143

1144
        /* If we had no lock fd so far, create one and lock it right-away */
1145
        if (d->lock_fd < 0) {
2✔
1146
                d->lock_fd = open_lock_fd(ASSERT_FD(d->fd), operation);
1✔
1147
                if (d->lock_fd < 0)
1✔
1148
                        return d->lock_fd;
1149

1150
                return 0;
1✔
1151
        }
1152

1153
        /* Otherwise change the current lock mode on the existing fd */
1154
        return RET_NERRNO(flock(d->lock_fd, operation));
1✔
1155
}
1156

1157
int loop_device_sync(LoopDevice *d) {
70✔
1158
        assert(d);
70✔
1159

1160
        /* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that
1161
         * we can check the return value though. */
1162

1163
        return RET_NERRNO(fsync(ASSERT_FD(d->fd)));
70✔
1164
}
1165

1166
int loop_device_set_autoclear(LoopDevice *d, bool autoclear) {
8✔
1167
        struct loop_info64 info;
8✔
1168

1169
        assert(d);
8✔
1170

1171
        if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
8✔
1172
                return -errno;
×
1173

1174
        if (autoclear == FLAGS_SET(info.lo_flags, LO_FLAGS_AUTOCLEAR))
8✔
1175
                return 0;
1176

1177
        SET_FLAG(info.lo_flags, LO_FLAGS_AUTOCLEAR, autoclear);
8✔
1178

1179
        if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
8✔
1180
                return -errno;
×
1181

1182
        return 1;
1183
}
1184

1185
int loop_device_set_filename(LoopDevice *d, const char *name) {
4✔
1186
        struct loop_info64 info;
4✔
1187

1188
        assert(d);
4✔
1189

1190
        /* Sets the .lo_file_name of the loopback device. This is supposed to contain the path to the file
1191
         * backing the block device, but is actually just a free-form string you can pass to the kernel. Most
1192
         * tools that actually care for the backing file path use the sysfs attribute file loop/backing_file
1193
         * which is a kernel generated string, subject to file system namespaces and such.
1194
         *
1195
         * .lo_file_name is useful since userspace can select it freely when creating a loopback block
1196
         * device, and we can use it for /dev/disk/by-loop-ref/ symlinks, and similar, so that apps can
1197
         * recognize their own loopback files. */
1198

1199
        if (name && strlen(name) >= sizeof(info.lo_file_name))
4✔
1200
                return -ENOBUFS;
4✔
1201

1202
        if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
4✔
1203
                return -errno;
×
1204

1205
        if (strneq((char*) info.lo_file_name, strempty(name), sizeof(info.lo_file_name)))
4✔
1206
                return 0;
1207

1208
        if (name) {
4✔
1209
                strncpy((char*) info.lo_file_name, name, sizeof(info.lo_file_name)-1);
4✔
1210
                info.lo_file_name[sizeof(info.lo_file_name)-1] = 0;
4✔
1211
        } else
1212
                memzero(info.lo_file_name, sizeof(info.lo_file_name));
×
1213

1214
        if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
4✔
1215
                return -errno;
×
1216

1217
        return 1;
1218
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc