• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 21269554371

22 Jan 2026 06:13PM UTC coverage: 72.571% (-0.2%) from 72.798%
21269554371

push

github

YHNdnzj
pam_systemd: fix regression introduced in v258 by preserving the FIFO fd

Upstream commit 3180c4d introduced a version incompatibility between
pam_systemd.so v258 and logind v257. This is problematic because such version
mismatches can occur in practice: logind still cannot be restarted during a
systemd package upgrade (it's a long-standing limitation, see
https://github.com/systemd/systemd/issues/17308).

When pam_systemd requests a new session, logind v257 returns a FIFO
fd. pam_systemd.so v258 ignores this fd and closes it. logind interprets the
closure as the session leader exiting and immediately terminates the session.

This patch partially reverts commit 3180c4d and restores the handling of the
FIFO fd in pam_systemd. The change is limited to the D-Bus APIs, since the
varlink API was only introduced in logind v258.

Follow-up for 3180c4d46.

0 of 9 new or added lines in 1 file covered. (0.0%)

1194 existing lines in 35 files now uncovered.

310232 of 427490 relevant lines covered (72.57%)

1165572.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

65.15
/src/shared/loop-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#if HAVE_VALGRIND_MEMCHECK_H
4
#include <valgrind/memcheck.h>
5
#endif
6

7
#include <fcntl.h>
8
#include <linux/loop.h>
9
#include <sys/file.h>
10
#include <sys/ioctl.h>
11
#include <unistd.h>
12

13
#include "sd-device.h"
14

15
#include "alloc-util.h"
16
#include "blockdev-util.h"
17
#include "data-fd-util.h"
18
#include "device-util.h"
19
#include "devnum-util.h"
20
#include "dissect-image.h"
21
#include "env-util.h"
22
#include "errno-util.h"
23
#include "fd-util.h"
24
#include "fileio.h"
25
#include "fs-util.h"
26
#include "loop-util.h"
27
#include "parse-util.h"
28
#include "path-util.h"
29
#include "random-util.h"
30
#include "stat-util.h"
31
#include "stdio-util.h"
32
#include "string-util.h"
33
#include "time-util.h"
34

35
static void cleanup_clear_loop_close(int *fd) {
2,020✔
36
        if (*fd < 0)
2,020✔
37
                return;
38

39
        (void) ioctl(*fd, LOOP_CLR_FD);
×
40
        (void) safe_close(*fd);
×
41
}
42

43
static int loop_is_bound(int fd) {
2,020✔
44
        struct loop_info64 info;
2,020✔
45

46
        if (ioctl(ASSERT_FD(fd), LOOP_GET_STATUS64, &info) < 0) {
2,020✔
47
                if (errno == ENXIO)
1,967✔
48
                        return false; /* not bound! */
2,020✔
49

50
                return -errno;
×
51
        }
52

53
        return true; /* bound! */
54
}
55

56
static int open_lock_fd(int primary_fd, int operation) {
2,155✔
57
        _cleanup_close_ int lock_fd = -EBADF;
2,155✔
58

59
        assert(IN_SET(operation & ~LOCK_NB, LOCK_SH, LOCK_EX));
2,155✔
60

61
        lock_fd = fd_reopen(ASSERT_FD(primary_fd), O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
2,155✔
62
        if (lock_fd < 0)
2,155✔
63
                return lock_fd;
64

65
        if (flock(lock_fd, operation) < 0)
2,155✔
66
                return -errno;
×
67

68
        return TAKE_FD(lock_fd);
69
}
70

71
static int loop_configure_verify_direct_io(int fd, const struct loop_config *c) {
1,967✔
72
        assert(fd >= 0);
1,967✔
73
        assert(c);
1,967✔
74

75
        if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO)) {
1,967✔
76
                struct loop_info64 info;
1,966✔
77

78
                if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0)
1,966✔
79
                        return log_debug_errno(errno, "Failed to issue LOOP_GET_STATUS64: %m");
×
80

81
#if HAVE_VALGRIND_MEMCHECK_H
82
                VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
83
#endif
84

85
                /* On older kernels (<= 5.3) it was necessary to set the block size of the loopback block
86
                 * device to the logical block size of the underlying file system. Since there was no nice
87
                 * way to query the value, we are not bothering to do this however. On newer kernels the
88
                 * block size is propagated automatically and does not require intervention from us. We'll
89
                 * check here if enabling direct IO worked, to make this easily debuggable however.
90
                 *
91
                 * (Should anyone really care and actually wants direct IO on old kernels: it might be worth
92
                 * enabling direct IO with iteratively larger block sizes until it eventually works.)
93
                 *
94
                 * On older kernels (e.g.: 5.10) when this is attempted on a file stored on a dm-crypt
95
                 * backed partition the kernel will start returning I/O errors when accessing the mounted
96
                 * loop device, so return a recognizable error that causes the operation to be started
97
                 * from scratch without the LO_FLAGS_DIRECT_IO flag. */
98
                if (!FLAGS_SET(info.lo_flags, LO_FLAGS_DIRECT_IO))
1,966✔
99
                        return log_debug_errno(
×
100
                                        SYNTHETIC_ERRNO(ENOANO),
101
                                        "Could not enable direct IO mode, retrying in buffered IO mode.");
102
        }
103

104
        return 0;
105
}
106

107
static int loop_configure_verify(int fd, const struct loop_config *c) {
1,967✔
108
        bool broken = false;
1,967✔
109
        int r;
1,967✔
110

111
        assert(fd >= 0);
1,967✔
112
        assert(c);
1,967✔
113

114
        if (c->block_size != 0) {
1,967✔
115
                uint32_t ssz;
1,967✔
116

117
                r = blockdev_get_sector_size(fd, &ssz);
1,967✔
118
                if (r < 0)
1,967✔
119
                        return r;
×
120

121
                if (ssz != c->block_size) {
1,967✔
122
                        log_debug("LOOP_CONFIGURE didn't honour requested block size %" PRIu32 ", got %" PRIu32 " instead. Ignoring.", c->block_size, ssz);
×
123
                        broken = true;
124
                }
125
        }
126

127
        if (c->info.lo_sizelimit != 0) {
1,967✔
128
                /* Kernel 5.8 vanilla doesn't properly propagate the size limit into the
129
                 * block device. If it's used, let's immediately check if it had the desired
130
                 * effect hence. And if not use classic LOOP_SET_STATUS64. */
131
                uint64_t z;
92✔
132

133
                r = blockdev_get_device_size(fd, &z);
92✔
134
                if (r < 0)
92✔
135
                        return r;
×
136

137
                if (z != c->info.lo_sizelimit) {
92✔
138
                        log_debug("LOOP_CONFIGURE is broken, doesn't honour .info.lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
×
139
                        broken = true;
140
                }
141
        }
142

143
        if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_PARTSCAN)) {
1,967✔
144
                /* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag
145
                 * into the block device. Let's hence verify if things work correctly here
146
                 * before returning. */
147

148
                r = blockdev_partscan_enabled_fd(fd);
1,798✔
149
                if (r < 0)
1,798✔
150
                        return r;
151
                if (r == 0) {
1,798✔
152
                        log_debug("LOOP_CONFIGURE is broken, doesn't honour LO_FLAGS_PARTSCAN. Falling back to LOOP_SET_STATUS64.");
×
153
                        broken = true;
154
                }
155
        }
156

157
        r = loop_configure_verify_direct_io(fd, c);
1,967✔
158
        if (r < 0)
1,967✔
159
                return r;
160

161
        return !broken;
1,967✔
162
}
163

164
static int loop_configure_fallback(int fd, const struct loop_config *c) {
×
165
        struct loop_info64 info_copy;
×
166
        int r;
×
167

168
        assert(fd >= 0);
×
169
        assert(c);
×
170

171
        /* Only some of the flags LOOP_CONFIGURE can set are also settable via LOOP_SET_STATUS64, hence mask
172
         * them out. */
173
        info_copy = c->info;
×
174
        info_copy.lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS;
×
175

176
        /* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64
177
         * ioctl can return EAGAIN in case we change the info.lo_offset field, if someone else is accessing the
178
         * block device while we try to reconfigure it. This is a pretty common case, since udev might
179
         * instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways:
180
         * first, let's take the BSD lock to ensure that udev will not step in between the point in
181
         * time where we attach the fd and where we reconfigure the device. Secondly, let's wait 50ms on
182
         * EAGAIN and retry. The former should be an efficient mechanism to avoid we have to wait 50ms
183
         * needlessly if we are just racing against udev. The latter is protection against all other cases,
184
         * i.e. peers that do not take the BSD lock. */
185

186
        for (unsigned n_attempts = 0;;) {
×
187
                if (ioctl(fd, LOOP_SET_STATUS64, &info_copy) >= 0)
×
188
                        break;
189

190
                if (errno != EAGAIN || ++n_attempts >= 64)
×
191
                        return log_debug_errno(errno, "Failed to configure loopback block device: %m");
×
192

193
                /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more
194
                 * failed attempts we see */
195
                (void) usleep_safe(UINT64_C(10) * USEC_PER_MSEC +
×
196
                              random_u64_range(UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
×
197
        }
198

199
        /* If a block size is requested then try to configure it. If that doesn't work, ignore errors, but
200
         * afterwards, let's validate what is in effect, and if it doesn't match what we want, fail */
201
        if (c->block_size != 0) {
×
202
                uint32_t ssz;
×
203

204
                if (ioctl(fd, LOOP_SET_BLOCK_SIZE, (unsigned long) c->block_size) < 0)
×
205
                        log_debug_errno(errno, "Failed to set sector size, ignoring: %m");
×
206

207
                r = blockdev_get_sector_size(fd, &ssz);
×
208
                if (r < 0)
×
209
                        return log_debug_errno(r, "Failed to read sector size: %m");
×
210
                if (ssz != c->block_size)
×
211
                        return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Sector size of loopback device doesn't match what we requested, refusing.");
×
212
        }
213

214
        /* LO_FLAGS_DIRECT_IO is a flags we need to configure via explicit ioctls. */
215
        if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_DIRECT_IO))
×
216
                if (ioctl(fd, LOOP_SET_DIRECT_IO, 1UL) < 0)
×
217
                        log_debug_errno(errno, "Failed to enable direct IO mode, ignoring: %m");
×
218

219
        return loop_configure_verify_direct_io(fd, c);
×
220
}
221

222
static int loop_configure(
2,020✔
223
                int nr,
224
                int open_flags,
225
                int lock_op,
226
                const struct loop_config *c,
227
                LoopDevice **ret) {
228

229
        static bool loop_configure_broken = false;
2,020✔
230

231
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
2,020✔
232
        _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -EBADF; /* This must be declared before lock_fd. */
×
233
        _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
4,040✔
234
        _cleanup_free_ char *node = NULL;
2,020✔
235
        uint64_t diskseq = 0;
2,020✔
236
        dev_t devno;
2,020✔
237
        int r;
2,020✔
238

239
        assert(nr >= 0);
2,020✔
240
        assert(c);
2,020✔
241
        assert(ret);
2,020✔
242

243
        if (asprintf(&node, "/dev/loop%i", nr) < 0)
2,020✔
244
                return log_oom_debug();
×
245

246
        r = sd_device_new_from_devname(&dev, node);
2,020✔
247
        if (r < 0)
2,020✔
248
                return log_debug_errno(r, "Failed to create sd_device object for \"%s\": %m", node);
×
249

250
        r = sd_device_get_devnum(dev, &devno);
2,020✔
251
        if (r < 0)
2,020✔
252
                return log_device_debug_errno(dev, r, "Failed to get devnum: %m");
×
253

254
        fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
2,020✔
255
        if (fd < 0)
2,020✔
256
                return log_device_debug_errno(dev, fd, "Failed to open device: %m");
×
257

258
        /* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened
259
         * fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on
260
         * block devices to reprobe them, hence by having a separate fd we will later close() we can ensure
261
         * we trigger udev after everything is done. If we'd lock our own fd instead and keep it open for a
262
         * long time udev would possibly never run on it again, even though the fd is unlocked, simply
263
         * because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to
264
         * automatically release the lock, after we are done. */
265
        lock_fd = open_lock_fd(fd, LOCK_EX);
2,020✔
266
        if (lock_fd < 0)
2,020✔
267
                return log_device_debug_errno(dev, lock_fd, "Failed to acquire lock: %m");
×
268

269
        log_device_debug(dev, "Acquired exclusive lock.");
2,047✔
270

271
        /* Let's see if backing file is really unattached. Someone may already attach a backing file without
272
         * taking BSD lock. */
273
        r = loop_is_bound(fd);
2,020✔
274
        if (r < 0)
2,020✔
275
                return log_device_debug_errno(dev, r, "Failed to check if the loopback block device is bound: %m");
×
276
        if (r > 0)
2,020✔
277
                return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EBUSY),
53✔
278
                                              "The loopback block device is already bound, ignoring.");
279

280
        /* Let's see if the device is really detached, i.e. currently has no associated partition block
281
         * devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
282
         * superficially is detached but still has partition block devices associated for it. Let's then
283
         * manually remove the partitions via BLKPG, and tell the caller we did that via EUCLEAN, so they try
284
         * again. */
285
        r = block_device_remove_all_partitions(dev, fd);
1,967✔
286
        if (r < 0)
1,967✔
287
                return log_device_debug_errno(dev, r, "Failed to remove partitions on the loopback block device: %m");
×
288
        if (r > 0)
1,967✔
289
                /* Removed all partitions. Let's report this to the caller, to try again, and count this as
290
                 * an attempt. */
291
                return log_device_debug_errno(dev, SYNTHETIC_ERRNO(EUCLEAN),
×
292
                                              "Removed partitions on the loopback block device.");
293

294
        if (!loop_configure_broken) {
1,967✔
295
                if (ioctl(fd, LOOP_CONFIGURE, c) < 0) {
1,967✔
296
                        /* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other errors. */
UNCOV
297
                        if (!ERRNO_IS_IOCTL_NOT_SUPPORTED(errno))
×
UNCOV
298
                                return log_device_debug_errno(dev, errno, "ioctl(LOOP_CONFIGURE) failed: %m");
×
299

300
                        loop_configure_broken = true;
×
301
                } else {
302
                        loop_with_fd = TAKE_FD(fd);
1,967✔
303

304
                        r = loop_configure_verify(loop_with_fd, c);
1,967✔
305
                        if (r < 0)
1,967✔
306
                                return log_device_debug_errno(dev, r, "Failed to verify if loopback block device is correctly configured: %m");
×
307
                        if (r == 0) {
1,967✔
308
                                /* LOOP_CONFIGURE doesn't work. Remember that. */
309
                                loop_configure_broken = true;
×
310

311
                                /* We return EBUSY here instead of retrying immediately with LOOP_SET_FD,
312
                                 * because LOOP_CLR_FD is async: if the operation cannot be executed right
313
                                 * away it just sets the autoclear flag on the device. This means there's a
314
                                 * good chance we cannot actually reuse the loopback device right-away. Hence
315
                                 * let's assume it's busy, avoid the trouble and let the calling loop call us
316
                                 * again with a new, likely unused device. */
317
                                return -EBUSY;
×
318
                        }
319
                }
320
        }
321

322
        if (loop_configure_broken) {
1,967✔
323
                if (ioctl(fd, LOOP_SET_FD, c->fd) < 0)
×
324
                        return log_device_debug_errno(dev, errno, "ioctl(LOOP_SET_FD) failed: %m");
×
325

326
                loop_with_fd = TAKE_FD(fd);
×
327

328
                r = loop_configure_fallback(loop_with_fd, c);
×
329
                if (r < 0)
×
330
                        return r;
331
        }
332

333
        r = fd_get_diskseq(loop_with_fd, &diskseq);
1,967✔
334
        if (r < 0 && r != -EOPNOTSUPP)
1,967✔
335
                return log_device_debug_errno(dev, r, "Failed to get diskseq: %m");
×
336

337
        switch (lock_op & ~LOCK_NB) {
1,967✔
338
        case LOCK_EX: /* Already in effect */
339
                break;
340
        case LOCK_SH: /* Downgrade */
1,841✔
341
                if (flock(lock_fd, lock_op) < 0)
1,841✔
342
                        return log_device_debug_errno(dev, errno, "Failed to downgrade lock level: %m");
×
343
                break;
344
        case LOCK_UN: /* Release */
×
345
                lock_fd = safe_close(lock_fd);
×
346
                break;
347
        default:
×
348
                assert_not_reached();
×
349
        }
350

351
        uint64_t device_size;
1,967✔
352
        r = blockdev_get_device_size(loop_with_fd, &device_size);
1,967✔
353
        if (r < 0)
1,967✔
354
                return log_device_debug_errno(dev, r, "Failed to get loopback device size: %m");
×
355

356
        LoopDevice *d = new(LoopDevice, 1);
1,967✔
357
        if (!d)
1,967✔
358
                return log_oom_debug();
×
359

360
        *d = (LoopDevice) {
1,967✔
361
                .n_ref = 1,
362
                .fd = TAKE_FD(loop_with_fd),
1,967✔
363
                .lock_fd = TAKE_FD(lock_fd),
1,967✔
364
                .node = TAKE_PTR(node),
1,967✔
365
                .nr = nr,
366
                .devno = devno,
367
                .dev = TAKE_PTR(dev),
1,967✔
368
                .diskseq = diskseq,
369
                .sector_size = c->block_size,
1,967✔
370
                .device_size = device_size,
371
                .created = true,
372
        };
373

374
        *ret = TAKE_PTR(d);
1,967✔
375
        return 0;
1,967✔
376
}
377

378
static int fd_get_max_discard(int fd, uint64_t *ret) {
×
379
        struct stat st;
×
380
        char sysfs_path[STRLEN("/sys/dev/block/" ":" "/queue/discard_max_bytes") + DECIMAL_STR_MAX(dev_t) * 2 + 1];
×
381
        _cleanup_free_ char *buffer = NULL;
×
382
        int r;
×
383

384
        assert(ret);
×
385

386
        if (fstat(ASSERT_FD(fd), &st) < 0)
×
387
                return -errno;
×
388

389
        if (!S_ISBLK(st.st_mode))
×
390
                return -ENOTBLK;
391

392
        xsprintf(sysfs_path, "/sys/dev/block/" DEVNUM_FORMAT_STR "/queue/discard_max_bytes", DEVNUM_FORMAT_VAL(st.st_rdev));
×
393

394
        r = read_one_line_file(sysfs_path, &buffer);
×
395
        if (r < 0)
×
396
                return r;
397

398
        return safe_atou64(buffer, ret);
×
399
}
400

401
static int fd_set_max_discard(int fd, uint64_t max_discard) {
×
402
        struct stat st;
×
403
        char sysfs_path[STRLEN("/sys/dev/block/" ":" "/queue/discard_max_bytes") + DECIMAL_STR_MAX(dev_t) * 2 + 1];
×
404

405
        if (fstat(ASSERT_FD(fd), &st) < 0)
×
406
                return -errno;
×
407

408
        if (!S_ISBLK(st.st_mode))
×
409
                return -ENOTBLK;
410

411
        xsprintf(sysfs_path, "/sys/dev/block/" DEVNUM_FORMAT_STR "/queue/discard_max_bytes", DEVNUM_FORMAT_VAL(st.st_rdev));
×
412

413
        return write_string_filef(sysfs_path, WRITE_STRING_FILE_DISABLE_BUFFER, "%" PRIu64, max_discard);
×
414
}
415

416
static int loop_device_make_internal(
1,987✔
417
                const char *path,
418
                int fd,
419
                int open_flags,
420
                uint64_t offset,
421
                uint64_t size,
422
                uint32_t sector_size,
423
                uint32_t loop_flags,
424
                int lock_op,
425
                LoopDevice **ret) {
426

427
        _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
×
428
        _cleanup_close_ int reopened_fd = -EBADF, control = -EBADF;
3,974✔
429
        _cleanup_free_ char *backing_file = NULL;
1,987✔
430
        struct loop_config config;
1,987✔
431
        int r, f_flags;
1,987✔
432
        struct stat st;
1,987✔
433

434
        assert(ret);
1,987✔
435
        assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
1,987✔
436

437
        if (fstat(ASSERT_FD(fd), &st) < 0)
1,987✔
438
                return -errno;
×
439

440
        if (S_ISBLK(st.st_mode)) {
1,987✔
441
                if (offset == 0 && IN_SET(size, 0, UINT64_MAX))
×
442
                        /* If this is already a block device and we are supposed to cover the whole of it
443
                         * then store an fd to the original open device node — and do not actually create an
444
                         * unnecessary loopback device for it. */
445
                        return loop_device_open_from_fd(fd, open_flags, lock_op, ret);
×
446
        } else {
447
                r = stat_verify_regular(&st);
1,987✔
448
                if (r < 0)
1,987✔
449
                        return r;
450
        }
451

452
        if (path) {
1,987✔
453
                r = path_make_absolute_cwd(path, &backing_file);
265✔
454
                if (r < 0)
265✔
455
                        return r;
456

457
                path_simplify(backing_file);
265✔
458
        } else {
459
                r = fd_get_path(fd, &backing_file);
1,722✔
460
                if (r < 0)
1,722✔
461
                        return r;
462
        }
463

464
        f_flags = fcntl(fd, F_GETFL);
1,987✔
465
        if (f_flags < 0)
1,987✔
466
                return -errno;
×
467

468
        if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) != FLAGS_SET(f_flags, O_DIRECT)) {
1,987✔
469
                /* If LO_FLAGS_DIRECT_IO is requested, then make sure we have the fd open with O_DIRECT, as
470
                 * that's required. Conversely, if it's off require that O_DIRECT is off too (that's because
471
                 * new kernels will implicitly enable LO_FLAGS_DIRECT_IO if O_DIRECT is set).
472
                 *
473
                 * Our intention here is that LO_FLAGS_DIRECT_IO is the primary knob, and O_DIRECT derived
474
                 * from that automatically. */
475

476
                reopened_fd = fd_reopen(fd, (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0)|O_CLOEXEC|O_NONBLOCK|open_flags);
126✔
477
                if (reopened_fd < 0) {
126✔
478
                        if (!FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO))
×
479
                                return log_debug_errno(reopened_fd, "Failed to reopen file descriptor without O_DIRECT: %m");
×
480

481
                        /* Some file systems might not support O_DIRECT, let's gracefully continue without it then. */
482
                        log_debug_errno(reopened_fd, "Failed to enable O_DIRECT for backing file descriptor for loopback device. Continuing without.");
×
483
                        loop_flags &= ~LO_FLAGS_DIRECT_IO;
×
484
                } else
485
                        fd = reopened_fd; /* From now on, operate on our new O_DIRECT fd */
486
        }
487

488
        control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1,987✔
489
        if (control < 0)
1,987✔
490
                return -errno;
20✔
491

492
        if (sector_size == 0)
1,967✔
493
                /* If no sector size is specified, default to the classic default */
494
                sector_size = 512;
×
495
        else if (sector_size == UINT32_MAX) {
1,967✔
496

497
                if (S_ISBLK(st.st_mode))
1,841✔
498
                        /* If the sector size is specified as UINT32_MAX we'll propagate the sector size of
499
                         * the underlying block device. */
500
                        r = blockdev_get_sector_size(fd, &sector_size);
×
501
                else {
502
                        _cleanup_close_ int non_direct_io_fd = -EBADF;
1,987✔
503
                        int probe_fd;
1,841✔
504

505
                        assert(S_ISREG(st.st_mode));
1,841✔
506

507
                        /* If sector size is specified as UINT32_MAX, we'll try to probe the right sector
508
                         * size of the image in question by looking for the GPT partition header at various
509
                         * offsets. This of course only works if the image already has a disk label.
510
                         *
511
                         * So here we actually want to read the file contents ourselves. This is quite likely
512
                         * not going to work if we managed to enable O_DIRECT, because in such a case there
513
                         * are some pretty strict alignment requirements to offset, size and target, but
514
                         * there's no way to query what alignment specifically is actually required. Hence,
515
                         * let's avoid the mess, and temporarily open an fd without O_DIRECT for the probing
516
                         * logic. */
517

518
                        if (FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO)) {
1,841✔
519
                                non_direct_io_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1,840✔
520
                                if (non_direct_io_fd < 0)
1,840✔
521
                                        return non_direct_io_fd;
×
522

523
                                probe_fd = non_direct_io_fd;
524
                        } else
525
                                probe_fd = fd;
526

527
                        r = probe_sector_size(probe_fd, &sector_size);
1,841✔
528
                }
529
                if (r < 0)
1,841✔
530
                        return r;
531
        }
532

533
        config = (struct loop_config) {
3,934✔
534
                .fd = fd,
535
                .block_size = sector_size,
536
                .info = {
537
                        /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
538
                        .lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((open_flags & O_ACCMODE_STRICT) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
1,967✔
539
                        .lo_offset = offset,
540
                        .lo_sizelimit = size == UINT64_MAX ? 0 : size,
1,967✔
541
                },
542
        };
543

544
        /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
545
         * be gone already, taken by somebody else racing against us. */
546
        for (unsigned n_attempts = 0;;) {
1,967✔
547
                usec_t usec;
2,020✔
548
                int nr;
2,020✔
549

550
                /* Let's take a lock on the control device first. On a busy system, where many programs
551
                 * attempt to allocate a loopback device at the same time, we might otherwise keep looping
552
                 * around relatively heavy operations: asking for a free loopback device, then opening it,
553
                 * validating it, attaching something to it. Let's serialize this whole operation, to make
554
                 * unnecessary busywork less likely. Note that this is just something we do to optimize our
555
                 * own code (and whoever else decides to use LOCK_EX locks for this), taking this lock is not
556
                 * necessary, it just means it's less likely we have to iterate through this loop again and
557
                 * again if our own code races against our own code.
558
                 *
559
                 * Note: our lock protocol is to take the /dev/loop-control lock first, and the block device
560
                 * lock second, if both are taken, and always in this order, to avoid ABBA locking issues. */
561
                if (flock(control, LOCK_EX) < 0)
2,020✔
562
                        return -errno;
×
563

564
                nr = ioctl(control, LOOP_CTL_GET_FREE);
2,020✔
565
                if (nr < 0)
2,020✔
566
                        return -errno;
×
567

568
                r = loop_configure(nr, open_flags, lock_op, &config, &d);
2,020✔
569
                if (r >= 0)
2,020✔
570
                        break;
571

572
                /* -ENODEV or friends: Somebody might've gotten the same number from the kernel, used the
573
                 * device, and called LOOP_CTL_REMOVE on it. Let's retry with a new number.
574
                 * -EBUSY: a file descriptor is already bound to the loopback block device.
575
                 * -EUCLEAN: some left-over partition devices that were cleaned up.
576
                 * -ENOANO: we tried to use LO_FLAGS_DIRECT_IO but the kernel rejected it. */
577
                if (!ERRNO_IS_DEVICE_ABSENT(r) && !IN_SET(r, -EBUSY, -EUCLEAN, -ENOANO))
53✔
578
                        return r;
579

580
                /* OK, this didn't work, let's try again a bit later, but first release the lock on the
581
                 * control device */
582
                if (flock(control, LOCK_UN) < 0)
53✔
583
                        return -errno;
×
584

585
                if (++n_attempts >= 64) /* Give up eventually */
53✔
586
                        return -EBUSY;
587

588
                /* If we failed to enable direct IO mode, let's retry without it. We restart the process as
589
                 * on some combination of kernel version and storage filesystem, the kernel is very unhappy
590
                 * about a failed DIRECT_IO enablement and throws I/O errors. */
591
                if (r == -ENOANO && FLAGS_SET(config.info.lo_flags, LO_FLAGS_DIRECT_IO)) {
53✔
592
                        config.info.lo_flags &= ~LO_FLAGS_DIRECT_IO;
×
593
                        open_flags &= ~O_DIRECT;
×
594

595
                        int non_direct_io_fd = fd_reopen(config.fd, O_CLOEXEC|O_NONBLOCK|open_flags);
×
596
                        if (non_direct_io_fd < 0)
×
597
                                return log_debug_errno(
×
598
                                                non_direct_io_fd,
599
                                                "Failed to reopen file descriptor without O_DIRECT: %m");
600

601
                        safe_close(reopened_fd);
×
602
                        fd = config.fd = /* For cleanups */ reopened_fd = non_direct_io_fd;
×
603
                }
604

605
                /* Wait some random time, to make collision less likely. Let's pick a random time in the
606
                 * range 0ms…250ms, linearly scaled by the number of failed attempts. */
607
                usec = random_u64_range(UINT64_C(10) * USEC_PER_MSEC +
106✔
608
                                        UINT64_C(240) * USEC_PER_MSEC * n_attempts/64);
53✔
609
                log_debug("Trying again after %s.", FORMAT_TIMESPAN(usec, USEC_PER_MSEC));
53✔
610
                (void) usleep_safe(usec);
53✔
611
        }
612

613
        if (S_ISBLK(st.st_mode)) {
1,967✔
614
                /* Propagate backing device's discard byte limit to our loopback block device. We do this in
615
                 * order to avoid that (supposedly quick) discard requests on the loopback device get turned
616
                 * into (likely slow) zero-out requests on backing devices that do not support discarding
617
                 * natively, but do support zero-out. */
618
                uint64_t discard_max_bytes;
×
619

620
                r = fd_get_max_discard(fd, &discard_max_bytes);
×
621
                if (r < 0)
×
622
                        log_debug_errno(r, "Failed to read 'discard_max_bytes' of backing device, ignoring: %m");
×
623
                else {
624
                        r = fd_set_max_discard(d->fd, discard_max_bytes);
×
625
                        if (r < 0)
×
626
                                log_debug_errno(r, "Failed to write 'discard_max_bytes' of loop device, ignoring: %m");
×
627
                }
628
        }
629

630
        d->backing_file = TAKE_PTR(backing_file);
1,967✔
631
        d->backing_inode = st.st_ino;
1,967✔
632
        d->backing_devno = st.st_dev;
1,967✔
633

634
        log_debug("Successfully acquired %s, devno=%u:%u, nr=%i, diskseq=%" PRIu64,
1,967✔
635
                  d->node,
636
                  major(d->devno), minor(d->devno),
637
                  d->nr,
638
                  d->diskseq);
639

640
        *ret = TAKE_PTR(d);
1,967✔
641
        return 0;
1,967✔
642
}
643

644
static uint32_t loop_flags_mangle(uint32_t loop_flags) {
1,987✔
645
        int r;
1,987✔
646

647
        r = getenv_bool("SYSTEMD_LOOP_DIRECT_IO");
1,987✔
648
        if (r < 0 && r != -ENXIO)
1,987✔
649
                log_debug_errno(r, "Failed to parse $SYSTEMD_LOOP_DIRECT_IO, ignoring: %m");
×
650

651
        return UPDATE_FLAG(loop_flags, LO_FLAGS_DIRECT_IO, r != 0); /* Turn on LO_FLAGS_DIRECT_IO by default, unless explicitly configured to off. */
1,987✔
652
}
653

654
int loop_device_make(
126✔
655
                int fd,
656
                int open_flags,
657
                uint64_t offset,
658
                uint64_t size,
659
                uint32_t sector_size,
660
                uint32_t loop_flags,
661
                int lock_op,
662
                LoopDevice **ret) {
663

664
        assert(fd >= 0);
126✔
665
        assert(ret);
126✔
666

667
        return loop_device_make_internal(
126✔
668
                        NULL,
669
                        fd,
670
                        open_flags,
671
                        offset,
672
                        size,
673
                        sector_size,
674
                        loop_flags_mangle(loop_flags),
675
                        lock_op,
676
                        ret);
677
}
678

679
int loop_device_make_by_path_at(
1,861✔
680
                int dir_fd,
681
                const char *path,
682
                int open_flags,
683
                uint32_t sector_size,
684
                uint32_t loop_flags,
685
                int lock_op,
686
                LoopDevice **ret) {
687

688
        int r, basic_flags, direct_flags, rdwr_flags;
1,861✔
689
        _cleanup_close_ int fd = -EBADF;
1,861✔
690
        bool direct = false;
1,861✔
691

692
        assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
1,861✔
693
        assert(ret);
1,861✔
694
        assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
1,861✔
695

696
        /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying
697
         * read-only if we cannot. */
698

699
        loop_flags = loop_flags_mangle(loop_flags);
1,861✔
700

701
        /* Let's open with O_DIRECT if we can. But not all file systems support that, hence fall back to
702
         * non-O_DIRECT mode automatically, if it fails. */
703

704
        basic_flags = O_CLOEXEC|O_NONBLOCK|O_NOCTTY;
1,861✔
705
        direct_flags = FLAGS_SET(loop_flags, LO_FLAGS_DIRECT_IO) ? O_DIRECT : 0;
1,861✔
706
        rdwr_flags = open_flags >= 0 ? open_flags : O_RDWR;
1,861✔
707

708
        fd = xopenat(dir_fd, path, basic_flags|direct_flags|rdwr_flags);
1,861✔
709
        if (fd < 0 && direct_flags != 0) /* If we had O_DIRECT on, and things failed with that, let's immediately try again without */
1,861✔
710
                fd = xopenat(dir_fd, path, basic_flags|rdwr_flags);
1✔
711
        else
712
                direct = direct_flags != 0;
×
713
        if (fd < 0) {
1,861✔
714
                r = fd;
1✔
715

716
                /* Retry read-only? */
717
                if (open_flags >= 0 || !ERRNO_IS_NEG_FS_WRITE_REFUSED(r))
1,862✔
718
                        return r;
719

720
                fd = xopenat(dir_fd, path, basic_flags|direct_flags|O_RDONLY);
×
721
                if (fd < 0 && direct_flags != 0) /* as above */
×
722
                        fd = xopenat(dir_fd, path, basic_flags|O_RDONLY);
×
723
                else
724
                        direct = direct_flags != 0;
×
725
                if (fd < 0)
×
726
                        return r; /* Propagate original error */
727

728
                open_flags = O_RDONLY;
729
        } else if (open_flags < 0)
1,860✔
730
                open_flags = O_RDWR;
80✔
731

732
        log_debug("Opened %s in %s access mode%s, with O_DIRECT %s%s.",
8,846✔
733
                  path ?: "loop device",
734
                  open_flags == O_RDWR ? "O_RDWR" : "O_RDONLY",
735
                  open_flags != rdwr_flags ? " (O_RDWR was requested but not allowed)" : "",
736
                  direct ? "enabled" : "disabled",
737
                  direct != (direct_flags != 0) ? " (O_DIRECT was requested but not supported)" : "");
738

739
        return loop_device_make_internal(
3,455✔
740
                        dir_fd == AT_FDCWD ? path : NULL,
741
                        fd,
742
                        open_flags,
743
                        /* offset= */ 0,
744
                        /* size= */ 0,
745
                        sector_size,
746
                        loop_flags,
747
                        lock_op,
748
                        ret);
749
}
750

751
int loop_device_make_by_path_memory(
1✔
752
                const char *path,
753
                int open_flags,
754
                uint32_t sector_size,
755
                uint32_t loop_flags,
756
                int lock_op,
757
                LoopDevice **ret) {
758

759
        _cleanup_close_ int fd = -EBADF, mfd = -EBADF;
1✔
760
        _cleanup_free_ char *fn = NULL;
1✔
761
        struct stat st;
1✔
762
        int r;
1✔
763

764
        assert(path);
1✔
765
        assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
1✔
766
        assert(ret);
1✔
767

768
        loop_flags &= ~LO_FLAGS_DIRECT_IO; /* memfds don't support O_DIRECT, hence LO_FLAGS_DIRECT_IO can't be used either */
1✔
769

770
        fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
1✔
771
        if (fd < 0)
1✔
772
                return -errno;
×
773

774
        if (fstat(fd, &st) < 0)
1✔
775
                return -errno;
×
776

777
        if (!S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode))
1✔
778
                return -EBADF;
779

780
        r = path_extract_filename(path, &fn);
1✔
781
        if (r < 0)
1✔
782
                return r;
783

784
        mfd = memfd_clone_fd(fd, fn, open_flags|O_CLOEXEC);
1✔
785
        if (mfd < 0)
1✔
786
                return mfd;
787

788
        fd = safe_close(fd); /* Let's close the original early */
1✔
789

790
        return loop_device_make_internal(NULL, mfd, open_flags, 0, 0, sector_size, loop_flags, lock_op, ret);
1✔
791
}
792

793
static LoopDevice* loop_device_free(LoopDevice *d) {
2,013✔
794
        _cleanup_close_ int control = -EBADF;
2,013✔
795
        int r;
2,013✔
796

797
        if (!d)
2,013✔
798
                return NULL;
799

800
        /* Release any lock we might have on the device first. We want to open+lock the /dev/loop-control
801
         * device below, but our lock protocol says that if both control and block device locks are taken,
802
         * the control lock needs to be taken first, the block device lock second — in order to avoid ABBA
803
         * locking issues. Moreover, we want to issue LOOP_CLR_FD on the block device further down, and that
804
         * would fail if we had another fd open to the device. */
805
        d->lock_fd = safe_close(d->lock_fd);
2,013✔
806

807
        /* Let's open the control device early, and lock it, so that we can release our block device and
808
         * delete it in a synchronized fashion, and allocators won't needlessly see the block device as free
809
         * while we are about to delete it. */
810
        if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
2,013✔
811
                control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1,739✔
812
                if (control < 0)
1,739✔
813
                        log_debug_errno(errno, "Failed to open loop control device, cannot remove loop device '%s', ignoring: %m", strna(d->node));
×
814
                else if (flock(control, LOCK_EX) < 0)
1,739✔
815
                        log_debug_errno(errno, "Failed to lock loop control device, ignoring: %m");
×
816
        }
817

818
        /* Then let's release the loopback block device */
819
        if (d->fd >= 0) {
2,013✔
820
                /* Implicitly sync the device, since otherwise in-flight blocks might not get written */
821
                if (fsync(d->fd) < 0)
2,013✔
822
                        log_debug_errno(errno, "Failed to sync loop block device, ignoring: %m");
×
823

824
                if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
2,013✔
825
                        /* We are supposed to clear the loopback device. Let's do this synchronously: lock
826
                         * the device, manually remove all partitions and then clear it. This should ensure
827
                         * udev doesn't concurrently access the devices, and we can be reasonably sure that
828
                         * once we are done here the device is cleared and all its partition children
829
                         * removed. Note that we lock our primary device fd here (and not a separate locking
830
                         * fd, as we do during allocation, since we want to keep the lock all the way through
831
                         * the LOOP_CLR_FD, but that call would fail if we had more than one fd open.) */
832

833
                        if (flock(d->fd, LOCK_EX) < 0)
1,739✔
834
                                log_debug_errno(errno, "Failed to lock loop block device, ignoring: %m");
×
835

836
                        r = block_device_remove_all_partitions(d->dev, d->fd);
1,739✔
837
                        if (r < 0)
1,739✔
838
                                log_debug_errno(r, "Failed to remove partitions of loopback block device, ignoring: %m");
×
839

840
                        if (ioctl(d->fd, LOOP_CLR_FD) < 0)
1,739✔
841
                                log_debug_errno(errno, "Failed to clear loop device, ignoring: %m");
×
842
                }
843

844
                safe_close(d->fd);
2,013✔
845
        }
846

847
        /* Now that the block device is released, let's also try to remove it */
848
        if (control >= 0) {
2,013✔
849
                useconds_t delay = 5 * USEC_PER_MSEC;  /* A total delay of 5090 ms between 39 attempts,
850
                                                        * (4*5 + 5*10 + 5*20 + … + 3*640) = 5090. */
851

852
                for (unsigned attempt = 1;; attempt++) {
102✔
853
                        if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
1,841✔
854
                                break;
855
                        if (errno != EBUSY || attempt > 38) {
103✔
856
                                log_debug_errno(errno, "Failed to remove device %s: %m", strna(d->node));
1✔
857
                                break;
858
                        }
859
                        if (attempt % 5 == 0) {
102✔
860
                                log_debug("Device is still busy after %u attempts…", attempt);
14✔
861
                                delay *= 2;
14✔
862
                        }
863

864
                        (void) usleep_safe(delay);
102✔
865
                }
866
        }
867

868
        free(d->node);
2,013✔
869
        sd_device_unref(d->dev);
2,013✔
870
        free(d->backing_file);
2,013✔
871
        return mfree(d);
2,013✔
872
}
873

874
DEFINE_TRIVIAL_REF_UNREF_FUNC(LoopDevice, loop_device, loop_device_free);
6,296✔
875

876
void loop_device_relinquish(LoopDevice *d) {
161✔
877
        assert(d);
161✔
878

879
        /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
880
         * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
881

882
        d->relinquished = true;
161✔
883
}
161✔
884

885
void loop_device_unrelinquish(LoopDevice *d) {
22✔
886
        assert(d);
22✔
887
        d->relinquished = false;
22✔
888
}
22✔
889

890
int loop_device_open(
134✔
891
                sd_device *dev,
892
                int open_flags,
893
                int lock_op,
894
                LoopDevice **ret) {
895

896
        _cleanup_close_ int fd = -EBADF, lock_fd = -EBADF;
134✔
897
        _cleanup_free_ char *node = NULL, *backing_file = NULL;
134✔
898
        dev_t devnum, backing_devno = 0;
134✔
899
        struct loop_info64 info;
134✔
900
        ino_t backing_inode = 0;
134✔
901
        uint64_t diskseq = 0;
134✔
902
        LoopDevice *d;
134✔
903
        const char *s;
134✔
904
        int r, nr = -1;
134✔
905

906
        assert(dev);
134✔
907
        assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
134✔
908
        assert(ret);
134✔
909

910
        /* Even if fd is provided through the argument in loop_device_open_from_fd(), we reopen the inode
911
         * here, instead of keeping just a dup() clone of it around, since we want to ensure that the
912
         * O_DIRECT flag of the handle we keep is off, we have our own file index, and have the right
913
         * read/write mode in effect. */
914
        fd = sd_device_open(dev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
134✔
915
        if (fd < 0)
134✔
916
                return fd;
917

918
        if ((lock_op & ~LOCK_NB) != LOCK_UN) {
134✔
919
                lock_fd = open_lock_fd(fd, lock_op);
134✔
920
                if (lock_fd < 0)
134✔
921
                        return lock_fd;
922
        }
923

924
        if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0) {
134✔
925
#if HAVE_VALGRIND_MEMCHECK_H
926
                /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
927
                VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
928
#endif
929
                nr = info.lo_number;
28✔
930

931
                if (sd_device_get_sysattr_value(dev, "loop/backing_file", &s) >= 0) {
28✔
932
                        backing_file = strdup(s);
19✔
933
                        if (!backing_file)
19✔
934
                                return -ENOMEM;
935
                }
936

937
                backing_devno = info.lo_device;
28✔
938
                backing_inode = info.lo_inode;
28✔
939
        }
940

941
        r = fd_get_diskseq(fd, &diskseq);
134✔
942
        if (r < 0 && r != -EOPNOTSUPP)
134✔
943
                return r;
944

945
        uint32_t sector_size;
134✔
946
        r = blockdev_get_sector_size(fd, &sector_size);
134✔
947
        if (r < 0)
134✔
948
                return r;
949

950
        uint64_t device_size;
134✔
951
        r = blockdev_get_device_size(fd, &device_size);
134✔
952
        if (r < 0)
134✔
953
                return r;
954

955
        r = sd_device_get_devnum(dev, &devnum);
134✔
956
        if (r < 0)
134✔
957
                return r;
958

959
        r = sd_device_get_devname(dev, &s);
134✔
960
        if (r < 0)
134✔
961
                return r;
962

963
        node = strdup(s);
134✔
964
        if (!node)
134✔
965
                return -ENOMEM;
966

967
        d = new(LoopDevice, 1);
134✔
968
        if (!d)
134✔
969
                return -ENOMEM;
970

971
        *d = (LoopDevice) {
268✔
972
                .n_ref = 1,
973
                .fd = TAKE_FD(fd),
134✔
974
                .lock_fd = TAKE_FD(lock_fd),
134✔
975
                .nr = nr,
976
                .node = TAKE_PTR(node),
134✔
977
                .dev = sd_device_ref(dev),
134✔
978
                .backing_file = TAKE_PTR(backing_file),
134✔
979
                .backing_inode = backing_inode,
980
                .backing_devno = backing_devno,
981
                .relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
982
                .devno = devnum,
983
                .diskseq = diskseq,
984
                .sector_size = sector_size,
985
                .device_size = device_size,
986
                .created = false,
987
        };
988

989
        *ret = d;
134✔
990
        return 0;
134✔
991
}
992

993
int loop_device_open_from_fd(
2✔
994
                int fd,
995
                int open_flags,
996
                int lock_op,
997
                LoopDevice **ret) {
998

999
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
2✔
1000
        int r;
2✔
1001

1002
        r = block_device_new_from_fd(ASSERT_FD(fd), 0, &dev);
2✔
1003
        if (r < 0)
2✔
1004
                return r;
1005

1006
        return loop_device_open(dev, open_flags, lock_op, ret);
2✔
1007
}
1008

1009
int loop_device_open_from_path(
×
1010
                const char *path,
1011
                int open_flags,
1012
                int lock_op,
1013
                LoopDevice **ret) {
1014

1015
        _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
×
1016
        int r;
×
1017

1018
        assert(path);
×
1019

1020
        r = block_device_new_from_path(path, 0, &dev);
×
1021
        if (r < 0)
×
1022
                return r;
1023

1024
        return loop_device_open(dev, open_flags, lock_op, ret);
×
1025
}
1026

1027
static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
×
1028
        char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1];
×
1029
        _cleanup_free_ char *buffer = NULL;
×
1030
        uint64_t current_offset, current_size, partno;
×
1031
        _cleanup_close_ int whole_fd = -EBADF;
×
1032
        struct stat st;
×
1033
        dev_t devno;
×
1034
        int r;
×
1035

1036
        /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
1037
         * loopback device), and changes the offset, if needed. This is a fancy wrapper around
1038
         * BLKPG_RESIZE_PARTITION. */
1039

1040
        if (fstat(ASSERT_FD(partition_fd), &st) < 0)
×
1041
                return -errno;
×
1042

1043
        assert(S_ISBLK(st.st_mode));
×
1044

1045
        xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev));
×
1046
        r = read_one_line_file(sysfs, &buffer);
×
1047
        if (r == -ENOENT) /* not a partition, cannot resize */
×
1048
                return -ENOTTY;
1049
        if (r < 0)
×
1050
                return r;
1051
        r = safe_atou64(buffer, &partno);
×
1052
        if (r < 0)
×
1053
                return r;
1054

1055
        xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/start", DEVNUM_FORMAT_VAL(st.st_rdev));
×
1056

1057
        buffer = mfree(buffer);
×
1058
        r = read_one_line_file(sysfs, &buffer);
×
1059
        if (r < 0)
×
1060
                return r;
1061
        r = safe_atou64(buffer, &current_offset);
×
1062
        if (r < 0)
×
1063
                return r;
1064
        if (current_offset > UINT64_MAX/512U)
×
1065
                return -EINVAL;
1066
        current_offset *= 512U;
×
1067

1068
        r = blockdev_get_device_size(partition_fd, &current_size);
×
1069
        if (r < 0)
×
1070
                return r;
1071

1072
        if (size == UINT64_MAX && offset == UINT64_MAX)
×
1073
                return 0;
1074
        if (current_size == size && current_offset == offset)
×
1075
                return 0;
1076

1077
        xsprintf(sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/../dev", DEVNUM_FORMAT_VAL(st.st_rdev));
×
1078

1079
        buffer = mfree(buffer);
×
1080
        r = read_one_line_file(sysfs, &buffer);
×
1081
        if (r < 0)
×
1082
                return r;
1083
        r = parse_devnum(buffer, &devno);
×
1084
        if (r < 0)
×
1085
                return r;
1086

1087
        whole_fd = r = device_open_from_devnum(S_IFBLK, devno, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY, NULL);
×
1088
        if (r < 0)
×
1089
                return r;
1090

1091
        return block_device_resize_partition(
×
1092
                        whole_fd,
1093
                        partno,
1094
                        offset == UINT64_MAX ? current_offset : offset,
1095
                        size == UINT64_MAX ? current_size : size);
1096
}
1097

1098
int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
18✔
1099
        struct loop_info64 info;
18✔
1100

1101
        assert(d);
18✔
1102
        assert(d->fd >= 0);
18✔
1103

1104
        /* Changes the offset/start of the loop device relative to the beginning of the underlying file or
1105
         * block device. If this loop device actually refers to a partition and not a loopback device, we'll
1106
         * try to adjust the partition offsets instead.
1107
         *
1108
         * If either offset or size is UINT64_MAX we won't change that parameter. */
1109

1110
        if (d->nr < 0) /* not a loopback device */
18✔
1111
                return resize_partition(d->fd, offset, size);
×
1112

1113
        if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
18✔
1114
                return -errno;
×
1115

1116
#if HAVE_VALGRIND_MEMCHECK_H
1117
        /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
1118
        VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
1119
#endif
1120

1121
        if ((size == UINT64_MAX || info.lo_sizelimit == size) &&
18✔
1122
            (offset == UINT64_MAX || info.lo_offset == offset))
×
1123
                return 0;
1124

1125
        if (size != UINT64_MAX)
18✔
1126
                info.lo_sizelimit = size;
18✔
1127
        if (offset != UINT64_MAX)
18✔
1128
                info.lo_offset = offset;
×
1129

1130
        return RET_NERRNO(ioctl(d->fd, LOOP_SET_STATUS64, &info));
18✔
1131
}
1132

1133
int loop_device_flock(LoopDevice *d, int operation) {
143✔
1134
        assert(IN_SET(operation & ~LOCK_NB, LOCK_UN, LOCK_SH, LOCK_EX));
143✔
1135
        assert(d);
143✔
1136

1137
        /* When unlocking just close the lock fd */
1138
        if ((operation & ~LOCK_NB) == LOCK_UN) {
143✔
1139
                d->lock_fd = safe_close(d->lock_fd);
141✔
1140
                return 0;
141✔
1141
        }
1142

1143
        /* If we had no lock fd so far, create one and lock it right-away */
1144
        if (d->lock_fd < 0) {
2✔
1145
                d->lock_fd = open_lock_fd(ASSERT_FD(d->fd), operation);
1✔
1146
                if (d->lock_fd < 0)
1✔
1147
                        return d->lock_fd;
1148

1149
                return 0;
1✔
1150
        }
1151

1152
        /* Otherwise change the current lock mode on the existing fd */
1153
        return RET_NERRNO(flock(d->lock_fd, operation));
1✔
1154
}
1155

1156
int loop_device_sync(LoopDevice *d) {
73✔
1157
        assert(d);
73✔
1158

1159
        /* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that
1160
         * we can check the return value though. */
1161

1162
        return RET_NERRNO(fsync(ASSERT_FD(d->fd)));
73✔
1163
}
1164

1165
int loop_device_set_autoclear(LoopDevice *d, bool autoclear) {
8✔
1166
        struct loop_info64 info;
8✔
1167

1168
        assert(d);
8✔
1169

1170
        if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
8✔
1171
                return -errno;
×
1172

1173
        if (autoclear == FLAGS_SET(info.lo_flags, LO_FLAGS_AUTOCLEAR))
8✔
1174
                return 0;
1175

1176
        SET_FLAG(info.lo_flags, LO_FLAGS_AUTOCLEAR, autoclear);
8✔
1177

1178
        if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
8✔
1179
                return -errno;
×
1180

1181
        return 1;
1182
}
1183

1184
int loop_device_set_filename(LoopDevice *d, const char *name) {
4✔
1185
        struct loop_info64 info;
4✔
1186

1187
        assert(d);
4✔
1188

1189
        /* Sets the .lo_file_name of the loopback device. This is supposed to contain the path to the file
1190
         * backing the block device, but is actually just a free-form string you can pass to the kernel. Most
1191
         * tools that actually care for the backing file path use the sysfs attribute file loop/backing_file
1192
         * which is a kernel generated string, subject to file system namespaces and such.
1193
         *
1194
         * .lo_file_name is useful since userspace can select it freely when creating a loopback block
1195
         * device, and we can use it for /dev/disk/by-loop-ref/ symlinks, and similar, so that apps can
1196
         * recognize their own loopback files. */
1197

1198
        if (name && strlen(name) >= sizeof(info.lo_file_name))
4✔
1199
                return -ENOBUFS;
4✔
1200

1201
        if (ioctl(ASSERT_FD(d->fd), LOOP_GET_STATUS64, &info) < 0)
4✔
1202
                return -errno;
×
1203

1204
        if (strneq((char*) info.lo_file_name, strempty(name), sizeof(info.lo_file_name)))
4✔
1205
                return 0;
1206

1207
        if (name) {
4✔
1208
                strncpy((char*) info.lo_file_name, name, sizeof(info.lo_file_name)-1);
4✔
1209
                info.lo_file_name[sizeof(info.lo_file_name)-1] = 0;
4✔
1210
        } else
1211
                memzero(info.lo_file_name, sizeof(info.lo_file_name));
×
1212

1213
        if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
4✔
1214
                return -errno;
×
1215

1216
        return 1;
1217
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc