• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 18436453786

10 Oct 2025 10:13PM UTC coverage: 72.292% (-0.04%) from 72.329%
18436453786

push

github

YHNdnzj
pidfd-util: add missing trailing argument for ioctl(PIDFD_GET_*_NAMESPACE)

Otherwise, it always fails with EINVAL.

1 of 1 new or added line in 1 file covered. (100.0%)

330 existing lines in 38 files now uncovered.

303595 of 419954 relevant lines covered (72.29%)

1064618.45 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.21
/src/basic/namespace-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <fcntl.h>
4
#include <linux/magic.h>
5
#include <linux/nsfs.h>
6
#include <sched.h>
7
#include <sys/ioctl.h>
8
#include <sys/mount.h>
9
#include <unistd.h>
10

11
#include "errno-util.h"
12
#include "fd-util.h"
13
#include "fileio.h"
14
#include "log.h"
15
#include "mountpoint-util.h"
16
#include "namespace-util.h"
17
#include "parse-util.h"
18
#include "pidfd-util.h"
19
#include "pidref.h"
20
#include "process-util.h"
21
#include "stat-util.h"
22
#include "stdio-util.h"
23
#include "uid-range.h"
24
#include "user-util.h"
25

26
const struct namespace_info namespace_info[_NAMESPACE_TYPE_MAX + 1] = {
27
        [NAMESPACE_CGROUP] =  { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, PROC_CGROUP_INIT_INO },
28
        [NAMESPACE_IPC]    =  { "ipc",    "ns/ipc",    CLONE_NEWIPC,    PIDFD_GET_IPC_NAMESPACE,    PROC_IPC_INIT_INO    },
29
        [NAMESPACE_NET]    =  { "net",    "ns/net",    CLONE_NEWNET,    PIDFD_GET_NET_NAMESPACE,    0                    },
30
        /* So, the mount namespace flag is called CLONE_NEWNS for historical
31
         * reasons. Let's expose it here under a more explanatory name: "mnt".
32
         * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
33
        [NAMESPACE_MOUNT]  =  { "mnt",    "ns/mnt",    CLONE_NEWNS,     PIDFD_GET_MNT_NAMESPACE,    0                    },
34
        [NAMESPACE_PID]    =  { "pid",    "ns/pid",    CLONE_NEWPID,    PIDFD_GET_PID_NAMESPACE,    PROC_PID_INIT_INO    },
35
        [NAMESPACE_USER]   =  { "user",   "ns/user",   CLONE_NEWUSER,   PIDFD_GET_USER_NAMESPACE,   PROC_USER_INIT_INO   },
36
        [NAMESPACE_UTS]    =  { "uts",    "ns/uts",    CLONE_NEWUTS,    PIDFD_GET_UTS_NAMESPACE,    PROC_UTS_INIT_INO    },
37
        [NAMESPACE_TIME]   =  { "time",   "ns/time",   CLONE_NEWTIME,   PIDFD_GET_TIME_NAMESPACE,   PROC_TIME_INIT_INO   },
38
        {}, /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */
39
};
40

41
#define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path)
42

43
NamespaceType clone_flag_to_namespace_type(unsigned long clone_flag) {
144✔
44
        for (NamespaceType t = 0; t < _NAMESPACE_TYPE_MAX; t++)
670✔
45
                if (((namespace_info[t].clone_flag ^ clone_flag) & (CLONE_NEWCGROUP|CLONE_NEWIPC|CLONE_NEWNET|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUSER|CLONE_NEWUTS|CLONE_NEWTIME)) == 0)
670✔
46
                        return t;
47

48
        return _NAMESPACE_TYPE_INVALID;
49
}
50

51
bool namespace_type_supported(NamespaceType type) {
1,417✔
52
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
1,417✔
53

54
        const char *p = pid_namespace_path(0, type);
7,085✔
55
        return access(p, F_OK) >= 0;
1,417✔
56
}
57

58
static int pidref_namespace_open_by_type_internal(const PidRef *pidref, NamespaceType type, bool *need_verify) {
8,936✔
59
        int r;
8,936✔
60

61
        assert(pidref_is_set(pidref));
8,936✔
62
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
8,936✔
63

64
        if (pidref_is_remote(pidref))
8,936✔
65
                return -EREMOTE;
8,936✔
66

67
        if (pidref->fd >= 0) {
8,936✔
68
                r = pidfd_get_namespace(pidref->fd, namespace_info[type].pidfd_get_ns_ioctl_cmd);
8,930✔
69
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
8,930✔
70
                        return r;
71
        }
72

73
        if (need_verify) /* The caller shall call pidref_verify() later */
6✔
UNCOV
74
                *need_verify = true;
×
75

76
        _cleanup_close_ int nsfd = -EBADF;
8,936✔
77
        const char *p;
6✔
78

79
        p = pid_namespace_path(pidref->pid, type);
6✔
80
        nsfd = RET_NERRNO(open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC));
6✔
81
        if (nsfd == -ENOENT) {
×
82
                r = proc_mounted();
×
83
                if (r == 0)
×
84
                        return -ENOSYS;  /* /proc/ is not available or not set up properly, we're most likely
85
                                            in some chroot environment. */
86
                if (r > 0)
×
87
                        return -ENOPKG;  /* If /proc/ is definitely around then this means the namespace type is not supported */
88

89
                /* can't determine? then propagate original error */
90
        }
91
        if (nsfd < 0)
6✔
92
                return nsfd;
93

94
        if (!need_verify) { /* Otherwise we verify on our own */
6✔
95
                r = pidref_verify(pidref);
6✔
96
                if (r < 0)
6✔
97
                        return r;
×
98
        }
99

100
        return TAKE_FD(nsfd);
101
}
102

103
int pidref_namespace_open_by_type(const PidRef *pidref, NamespaceType type) {
8,560✔
104
        return pidref_namespace_open_by_type_internal(pidref, type, NULL);
8,560✔
105
}
106

107
int namespace_open_by_type(NamespaceType type) {
4,567✔
108
        _cleanup_(pidref_done) PidRef self = PIDREF_NULL;
4,567✔
109
        int r;
4,567✔
110

111
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
4,567✔
112

113
        r = pidref_set_self(&self);
4,567✔
114
        if (r < 0)
4,567✔
115
                return r;
116

117
        return pidref_namespace_open_by_type(&self, type);
4,567✔
118
}
119

120
int pidref_namespace_open(
223✔
121
                const PidRef *pidref,
122
                int *ret_pidns_fd,
123
                int *ret_mntns_fd,
124
                int *ret_netns_fd,
125
                int *ret_userns_fd,
126
                int *ret_root_fd) {
127

128
        _cleanup_close_ int pidns_fd = -EBADF, mntns_fd = -EBADF, netns_fd = -EBADF,
446✔
129
                userns_fd = -EBADF, root_fd = -EBADF;
446✔
130
        bool need_verify = false;
223✔
131
        int r;
223✔
132

133
        assert(pidref_is_set(pidref));
223✔
134

135
        if (pidref_is_remote(pidref))
446✔
136
                return -EREMOTE;
137

138
        if (ret_pidns_fd) {
223✔
139
                pidns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_PID, &need_verify);
112✔
140
                if (pidns_fd < 0)
112✔
141
                        return pidns_fd;
142
        }
143

144
        if (ret_mntns_fd) {
223✔
145
                mntns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_MOUNT, &need_verify);
113✔
146
                if (mntns_fd < 0)
113✔
147
                        return mntns_fd;
148
        }
149

150
        if (ret_netns_fd) {
223✔
151
                netns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_NET, &need_verify);
113✔
152
                if (netns_fd < 0)
113✔
153
                        return netns_fd;
154
        }
155

156
        if (ret_userns_fd) {
223✔
157
                userns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_USER, &need_verify);
38✔
158
                if (userns_fd < 0 && userns_fd != -ENOPKG)
38✔
159
                        return userns_fd;
160
        }
161

162
        if (ret_root_fd) {
223✔
163
                const char *root;
113✔
164

165
                root = procfs_file_alloca(pidref->pid, "root");
113✔
166
                root_fd = RET_NERRNO(open(root, O_CLOEXEC|O_DIRECTORY));
113✔
167
                if (root_fd == -ENOENT && proc_mounted() == 0)
×
168
                        return -ENOSYS;
169
                if (root_fd < 0)
113✔
170
                        return root_fd;
171

172
                need_verify = true;
113✔
173
        }
174

175
        if (need_verify) {
223✔
176
                r = pidref_verify(pidref);
113✔
177
                if (r < 0)
113✔
178
                        return r;
179
        }
180

181
        if (ret_pidns_fd)
223✔
182
                *ret_pidns_fd = TAKE_FD(pidns_fd);
112✔
183

184
        if (ret_mntns_fd)
223✔
185
                *ret_mntns_fd = TAKE_FD(mntns_fd);
113✔
186

187
        if (ret_netns_fd)
223✔
188
                *ret_netns_fd = TAKE_FD(netns_fd);
113✔
189

190
        if (ret_userns_fd)
223✔
191
                *ret_userns_fd = TAKE_FD(userns_fd);
38✔
192

193
        if (ret_root_fd)
223✔
194
                *ret_root_fd = TAKE_FD(root_fd);
113✔
195

196
        return 0;
197
}
198

199
int namespace_open(
36✔
200
                pid_t pid,
201
                int *ret_pidns_fd,
202
                int *ret_mntns_fd,
203
                int *ret_netns_fd,
204
                int *ret_userns_fd,
205
                int *ret_root_fd) {
206

207
        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
36✔
208
        int r;
36✔
209

210
        r = pidref_set_pid(&pidref, pid);
36✔
211
        if (r < 0)
36✔
212
                return r;
213

214
        return pidref_namespace_open(&pidref, ret_pidns_fd, ret_mntns_fd, ret_netns_fd, ret_userns_fd, ret_root_fd);
36✔
215
}
216

217
int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
428✔
218
        int r;
428✔
219

220
        if (userns_fd >= 0) {
428✔
221
                /* Can't setns to your own userns, since then you could escalate from non-root to root in
222
                 * your own namespace, so check if namespaces are equal before attempting to enter. */
223

224
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
2✔
225
                if (r < 0)
2✔
226
                        return r;
227
                if (r > 0)
2✔
228
                        userns_fd = -EBADF;
2✔
229
        }
230

231
        if (pidns_fd >= 0)
428✔
232
                if (setns(pidns_fd, CLONE_NEWPID) < 0)
59✔
233
                        return -errno;
×
234

235
        if (mntns_fd >= 0)
428✔
236
                if (setns(mntns_fd, CLONE_NEWNS) < 0)
301✔
237
                        return -errno;
×
238

239
        if (netns_fd >= 0)
428✔
240
                if (setns(netns_fd, CLONE_NEWNET) < 0)
128✔
241
                        return -errno;
×
242

243
        if (userns_fd >= 0)
428✔
244
                if (setns(userns_fd, CLONE_NEWUSER) < 0)
×
245
                        return -errno;
×
246

247
        if (root_fd >= 0) {
428✔
248
                if (fchdir(root_fd) < 0)
60✔
249
                        return -errno;
×
250

251
                if (chroot(".") < 0)
60✔
252
                        return -errno;
×
253
        }
254

255
        if (userns_fd >= 0)
428✔
256
                return reset_uid_gid();
×
257

258
        return 0;
259
}
260

261
int fd_is_namespace(int fd, NamespaceType type) {
147✔
262
        int r;
147✔
263

264
        /* Checks whether the specified file descriptor refers to a namespace (of type if type != _NAMESPACE_INVALID). */
265

266
        assert(fd >= 0);
147✔
267
        assert(type < _NAMESPACE_TYPE_MAX);
147✔
268

269
        r = fd_is_fs_type(fd, NSFS_MAGIC);
147✔
270
        if (r <= 0)
147✔
271
                return r;
272

273
        if (type < 0)
144✔
274
                return true;
275

276
        int clone_flag = ioctl(fd, NS_GET_NSTYPE);
144✔
277
        if (clone_flag < 0)
144✔
278
                return -errno;
×
279

280
        NamespaceType found_type = clone_flag_to_namespace_type(clone_flag);
144✔
281
        if (found_type < 0)
144✔
282
                return -EBADF; /* Uh? Unknown namespace type? */
283

284
        return found_type == type;
144✔
285
}
286

287
int is_our_namespace(int fd, NamespaceType type) {
67✔
288
        int r;
67✔
289

290
        assert(fd >= 0);
67✔
291
        assert(type < _NAMESPACE_TYPE_MAX);
67✔
292

293
        r = fd_is_namespace(fd, type);
67✔
294
        if (r < 0)
67✔
295
                return r;
67✔
296
        if (r == 0) /* Not a namespace or not of the right type? */
67✔
297
                return -EUCLEAN;
298

299
        _cleanup_close_ int our_ns = namespace_open_by_type(type);
134✔
300
        if (our_ns < 0)
67✔
301
                return our_ns;
302

303
        return fd_inode_same(fd, our_ns);
67✔
304
}
305

306
int namespace_is_init(NamespaceType type) {
4,844✔
307
        int r;
4,844✔
308

309
        assert(type >= 0);
4,844✔
310
        assert(type < _NAMESPACE_TYPE_MAX);
4,844✔
311

312
        if (namespace_info[type].root_inode == 0)
4,844✔
313
                return -EBADR; /* Cannot answer this question */
4,844✔
314

315
        const char *p = pid_namespace_path(0, type);
24,210✔
316

317
        struct stat st;
4,842✔
318
        r = RET_NERRNO(stat(p, &st));
4,842✔
319
        if (r == -ENOENT) {
14✔
320
                /* If the /proc/ns/<type> API is not around in /proc/ then ns is off in the kernel and we are in the init ns */
321
                r = proc_mounted();
14✔
322
                if (r < 0)
14✔
323
                        return -ENOENT; /* If we can't determine if /proc/ is mounted propagate original error */
324

325
                return r ? true : -ENOSYS;
14✔
326
        }
327
        if (r < 0)
4,828✔
328
                return r;
329

330
        return st.st_ino == namespace_info[type].root_inode;
4,828✔
331
}
332

333
int pidref_in_same_namespace(PidRef *pid1, PidRef *pid2, NamespaceType type) {
158✔
334
        _cleanup_close_ int ns1 = -EBADF, ns2 = -EBADF;
158✔
335

336
        /* Accepts NULL to indicate our own process */
337

338
        assert(!pid1 || pidref_is_set(pid1));
158✔
339
        assert(!pid2 || pidref_is_set(pid2));
158✔
340
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
158✔
341

342
        if (pidref_equal(pid1, pid2))
158✔
343
                return true;
344

345
        if (!pid1)
158✔
346
                ns1 = namespace_open_by_type(type);
138✔
347
        else
348
                ns1 = pidref_namespace_open_by_type(pid1, type);
20✔
349
        if (ns1 < 0)
158✔
350
                return ns1;
351

352
        if (!pid2)
158✔
353
                ns2 = namespace_open_by_type(type);
2✔
354
        else
355
                ns2 = pidref_namespace_open_by_type(pid2, type);
156✔
356
        if (ns2 < 0)
158✔
357
                return ns2;
358

359
        return fd_inode_same(ns1, ns2);
158✔
360
}
361

362
int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) {
2✔
363
        assert(pid1 >= 0);
2✔
364
        assert(pid2 >= 0);
2✔
365
        return pidref_in_same_namespace(pid1 == 0 ? NULL : &PIDREF_MAKE_FROM_PID(pid1),
2✔
366
                                        pid2 == 0 ? NULL : &PIDREF_MAKE_FROM_PID(pid2),
×
367
                                        type);
368
}
369

370
int namespace_get_leader(PidRef *pidref, NamespaceType type, PidRef *ret) {
5✔
371
        int r;
5✔
372

373
        /* Note: we don't bother with pidref_is_set()/pidref_is_remote() here, as the first call we do,
374
         * pidref_get_ppid_as_pidref() calls those anyway */
375

376
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
5✔
377
        assert(ret);
5✔
378

379
        _cleanup_(pidref_done) PidRef current = PIDREF_NULL;
5✔
380
        PidRef *c = pidref;
5✔
381

382
        for (;;) {
9✔
383
                _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
5✔
384

385
                r = pidref_get_ppid_as_pidref(c, &parent);
14✔
386
                if (r < 0)
14✔
387
                        return r;
388

389
                r = pidref_in_same_namespace(c, &parent, type);
14✔
390
                if (r < 0)
14✔
391
                        return r;
392
                if (r == 0) {
14✔
393
                        /* If the parent and the child are not in the same namespace, then the child is
394
                         * the leader we are looking for. */
395

396
                        if (pidref_is_set(&current))
5✔
397
                                *ret = TAKE_PIDREF(current);
5✔
398
                        else {
399
                                r = pidref_copy(c, ret);
×
400
                                if (r < 0)
×
401
                                        return r;
402
                        }
403

404
                        return 0;
5✔
405
                }
406

407
                pidref_done(&current);
9✔
408
                current = TAKE_PIDREF(parent);
9✔
409
                c = &current;
9✔
410
        }
411
}
412

413
int detach_mount_namespace(void) {
193✔
414
        /* Detaches the mount namespace, disabling propagation from our namespace to the host. Sets
415
         * propagation first to MS_SLAVE for all mounts (disabling propagation), and then back to MS_SHARED
416
         * (so that we create a new peer group).  */
417

418
        if (unshare(CLONE_NEWNS) < 0)
193✔
419
                return log_debug_errno(errno, "Failed to acquire mount namespace: %m");
×
420

421
        if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
193✔
422
                return log_debug_errno(errno, "Failed to set mount propagation to MS_SLAVE for all mounts: %m");
×
423

424
        if (mount(NULL, "/", NULL, MS_SHARED | MS_REC, NULL) < 0)
193✔
425
                return log_debug_errno(errno, "Failed to set mount propagation back to MS_SHARED for all mounts: %m");
×
426

427
        return 0;
428
}
429

430
int detach_mount_namespace_harder(uid_t target_uid, gid_t target_gid) {
53✔
431
        uid_t from_uid;
53✔
432
        gid_t from_gid;
53✔
433
        int r;
53✔
434

435
        /* Tried detach_mount_namespace() first. If that doesn't work due to permissions, opens up an
436
         * unprivileged user namespace with a mapping of the originating UID/GID to the specified target
437
         * UID/GID. Then, tries detach_mount_namespace() again.
438
         *
439
         * Or in other words: tries much harder to get a mount namespace, making use of unprivileged user
440
         * namespaces if need be.
441
         *
442
         * Note that after this function completed:
443
         *
444
         *    → if we had privs, afterwards uids/gids on files and processes are as before
445
         *
446
         *    → if we had no privs, our own id and all our files will show up owned by target_uid/target_gid,
447
         *    and everything else owned by nobody.
448
         *
449
         * Yes, that's quite a difference. */
450

451
        if (!uid_is_valid(target_uid))
53✔
452
                return -EINVAL;
453
        if (!gid_is_valid(target_gid))
53✔
454
                return -EINVAL;
455

456
        r = detach_mount_namespace();
53✔
457
        if (r != -EPERM)
53✔
458
                return r;
459

460
        from_uid = getuid();
×
461
        from_gid = getgid();
×
462

463
        if (unshare(CLONE_NEWUSER) < 0)
×
464
                return log_debug_errno(errno, "Failed to acquire user namespace: %m");
×
465

466
        r = write_string_filef("/proc/self/uid_map", 0,
×
467
                               UID_FMT " " UID_FMT " 1\n", target_uid, from_uid);
468
        if (r < 0)
×
469
                return log_debug_errno(r, "Failed to write uid map: %m");
×
470

471
        r = write_string_file("/proc/self/setgroups", "deny", 0);
×
472
        if (r < 0)
×
473
                return log_debug_errno(r, "Failed to write setgroups file: %m");
×
474

475
        r = write_string_filef("/proc/self/gid_map", 0,
×
476
                               GID_FMT " " GID_FMT " 1\n", target_gid, from_gid);
477
        if (r < 0)
×
478
                return log_debug_errno(r, "Failed to write gid map: %m");
×
479

480
        return detach_mount_namespace();
×
481
}
482

483
int detach_mount_namespace_userns(int userns_fd) {
2✔
484
        int r;
2✔
485

486
        assert(userns_fd >= 0);
2✔
487

488
        if (setns(userns_fd, CLONE_NEWUSER) < 0)
2✔
489
                return log_debug_errno(errno, "Failed to join user namespace: %m");
×
490

491
        r = reset_uid_gid();
2✔
492
        if (r < 0)
2✔
493
                return log_debug_errno(r, "Failed to become root in user namespace: %m");
×
494

495
        return detach_mount_namespace();
2✔
496
}
497

498
int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_range) {
2✔
499
        _cleanup_free_ char *buffer = NULL;
2✔
500
        const char *range, *shift;
2✔
501
        int r;
2✔
502
        uid_t uid_shift, uid_range = 65536;
2✔
503

504
        assert(s);
2✔
505

506
        range = strchr(s, ':');
2✔
507
        if (range) {
2✔
508
                buffer = strndup(s, range - s);
×
509
                if (!buffer)
×
510
                        return log_oom();
×
511
                shift = buffer;
×
512

513
                range++;
×
514
                r = safe_atou32(range, &uid_range);
×
515
                if (r < 0)
×
516
                        return log_error_errno(r, "Failed to parse UID range \"%s\": %m", range);
×
517
        } else
518
                shift = s;
519

520
        r = parse_uid(shift, &uid_shift);
2✔
521
        if (r < 0)
2✔
522
                return log_error_errno(r, "Failed to parse UID \"%s\": %m", s);
2✔
523

524
        if (!userns_shift_range_valid(uid_shift, uid_range))
×
525
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID range cannot be empty or go beyond " UID_FMT ".", UID_INVALID);
×
526

527
        if (ret_uid_shift)
×
528
                *ret_uid_shift = uid_shift;
×
529

530
        if (ret_uid_range)
×
531
                *ret_uid_range = uid_range;
×
532

533
        return 0;
534
}
535

536
int userns_acquire_empty(void) {
22✔
537
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
22✔
538
        int r;
22✔
539

540
        r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
22✔
541
        if (r < 0)
22✔
542
                return r;
543
        assert(r > 0);
22✔
544

545
        return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
22✔
546
}
547

548
int userns_acquire(const char *uid_map, const char *gid_map, bool setgroups_deny) {
3,775✔
549
        char path[STRLEN("/proc//setgroups") + DECIMAL_STR_MAX(pid_t) + 1];
3,775✔
550
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
3,775✔
551
        int r;
3,775✔
552

553
        assert(uid_map);
3,775✔
554
        assert(gid_map);
3,775✔
555

556
        /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it,
557
         * and then kills the process again. This way we have a userns fd that is not bound to any
558
         * process. We can use that for file system mounts and similar. */
559

560
        r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
3,775✔
561
        if (r < 0)
3,775✔
562
                return r;
563
        assert(r > 0);
3,775✔
564

565
        xsprintf(path, "/proc/" PID_FMT "/uid_map", pid.pid);
3,775✔
566
        r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
3,775✔
567
        if (r < 0)
3,775✔
568
                return log_debug_errno(r, "Failed to write UID map: %m");
×
569

570
        if (setgroups_deny) {
3,775✔
571
                xsprintf(path, "/proc/" PID_FMT "/setgroups", pid.pid);
3,775✔
572
                r = write_string_file(path, "deny", WRITE_STRING_FILE_DISABLE_BUFFER);
3,775✔
573
                if (r < 0)
3,775✔
574
                        return log_debug_errno(r, "Failed to write setgroups file: %m");
×
575
        }
576

577
        xsprintf(path, "/proc/" PID_FMT "/gid_map", pid.pid);
3,775✔
578
        r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
3,775✔
579
        if (r < 0)
3,775✔
580
                return log_debug_errno(r, "Failed to write GID map: %m");
×
581

582
        return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
3,775✔
583
}
584

585
int userns_acquire_self_root(void) {
3,593✔
586

587
        /* Returns a user namespace with only our own uid/gid mapped to root, and everything else unmapped.
588
         *
589
         * Note: this can be acquired unprivileged! */
590

591
        _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
3,593✔
592
        if (asprintf(&uid_map, "0 " UID_FMT " 1", getuid()) < 0)
3,593✔
593
                return -ENOMEM;
594
        if (asprintf(&gid_map, "0 " GID_FMT " 1", getgid()) < 0)
3,593✔
595
                return -ENOMEM;
596

597
        return userns_acquire(uid_map, gid_map, /* setgroups_deny= */ true);
3,593✔
598
}
599

600
int userns_enter_and_pin(int userns_fd, pid_t *ret_pid) {
44✔
601
        _cleanup_close_pair_ int pfd[2] = EBADF_PAIR;
44✔
602
        _cleanup_(sigkill_waitp) pid_t pid = 0;
44✔
603
        ssize_t n;
44✔
604
        char x;
44✔
605
        int r;
44✔
606

607
        assert(userns_fd >= 0);
44✔
608
        assert(ret_pid);
44✔
609

610
        if (pipe2(pfd, O_CLOEXEC) < 0)
44✔
611
                return -errno;
×
612

613
        r = safe_fork_full(
88✔
614
                        "(sd-pinuserns)",
615
                        /* stdio_fds= */ NULL,
616
                        (int[]) { pfd[1], userns_fd }, 2,
44✔
617
                        FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL,
618
                        &pid);
619
        if (r < 0)
44✔
620
                return r;
621
        if (r == 0) {
44✔
622
                /* Child. */
623

624
                if (setns(userns_fd, CLONE_NEWUSER) < 0) {
×
625
                        log_debug_errno(errno, "Failed to join userns: %m");
×
626
                        _exit(EXIT_FAILURE);
×
627
                }
628

629
                userns_fd = safe_close(userns_fd);
×
630

631
                n = write(pfd[1], &(const char) { 'x' }, 1);
×
632
                if (n < 0) {
×
633
                        log_debug_errno(errno, "Failed to write to pipe: %m");
×
634
                        _exit(EXIT_FAILURE);
×
635
                }
636
                assert(n == 1);
×
637

638
                freeze();
×
639
        }
640

641
        pfd[1] = safe_close(pfd[1]);
44✔
642

643
        n = read(pfd[0], &x, 1);
44✔
644
        if (n < 0)
44✔
645
                return -errno;
×
646
        if (n == 0)
44✔
647
                return -EPROTO;
648
        assert(n == 1);
44✔
649
        assert(x == 'x');
44✔
650

651
        *ret_pid = TAKE_PID(pid);
44✔
652
        return 0;
44✔
653
}
654

655
bool userns_supported(void) {
271✔
656
        return access("/proc/self/uid_map", F_OK) >= 0;
271✔
657
}
658

659
int userns_get_base_uid(int userns_fd, uid_t *ret_uid, gid_t *ret_gid) {
20✔
660
        _cleanup_(sigkill_waitp) pid_t pid = 0;
20✔
661
        int r;
20✔
662

663
        assert(userns_fd >= 0);
20✔
664

665
        r = userns_enter_and_pin(userns_fd, &pid);
20✔
666
        if (r < 0)
20✔
667
                return r;
668

669
        uid_t uid;
20✔
670
        r = uid_map_search_root(pid, UID_RANGE_USERNS_OUTSIDE, &uid);
20✔
671
        if (r < 0)
20✔
672
                return r;
673

674
        gid_t gid;
19✔
675
        r = uid_map_search_root(pid, GID_RANGE_USERNS_OUTSIDE, &gid);
19✔
676
        if (r < 0)
19✔
677
                return r;
678

679
        if (!ret_gid && uid != gid)
19✔
680
                return -EUCLEAN;
681

682
        if (ret_uid)
18✔
683
                *ret_uid = uid;
18✔
684
        if (ret_gid)
18✔
685
                *ret_gid = gid;
1✔
686

687
        return 0;
688
}
689

690
int process_is_owned_by_uid(const PidRef *pidref, uid_t uid) {
8✔
691
        int r;
8✔
692

693
        /* Checks if the specified process either is owned directly by the specified user, or if it is inside
694
         * a user namespace owned by it. */
695

696
        assert(uid_is_valid(uid));
8✔
697

698
        uid_t process_uid;
8✔
699
        r = pidref_get_uid(pidref, &process_uid);
8✔
700
        if (r < 0)
8✔
701
                return r;
8✔
702
        if (process_uid == uid)
8✔
703
                return true;
704

705
        _cleanup_close_ int userns_fd = -EBADF;
8✔
706
        userns_fd = pidref_namespace_open_by_type(pidref, NAMESPACE_USER);
5✔
707
        if (userns_fd == -ENOPKG) /* If userns is not supported, then they don't matter for ownership */
5✔
708
                return false;
709
        if (userns_fd < 0)
5✔
710
                return userns_fd;
711

712
        for (unsigned iteration = 0;; iteration++) {
×
713
                uid_t ns_uid;
5✔
714

715
                /* This process is in our own userns? Then we are done, in our own userns only the UIDs
716
                 * themselves matter. */
717
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
5✔
718
                if (r < 0)
5✔
719
                        return r;
5✔
720
                if (r > 0)
5✔
721
                        return false;
722

723
                if (ioctl(userns_fd, NS_GET_OWNER_UID, &ns_uid) < 0)
3✔
724
                        return -errno;
×
725
                if (ns_uid == uid)
3✔
726
                        return true;
727

728
                /* Paranoia check */
729
                if (iteration > 16)
×
730
                        return log_debug_errno(SYNTHETIC_ERRNO(ELOOP), "Giving up while tracing parents of user namespaces after %u steps.", iteration);
×
731

732
                /* Go up the tree */
733
                _cleanup_close_ int parent_fd = ioctl(userns_fd, NS_GET_USERNS);
5✔
734
                if (parent_fd < 0) {
×
735
                        if (errno == EPERM) /* EPERM means we left our own userns */
×
736
                                return false;
737

738
                        return -errno;
×
739
                }
740

741
                close_and_replace(userns_fd, parent_fd);
×
742
        }
743
}
744

745
int is_idmapping_supported(const char *path) {
3,592✔
746
        _cleanup_close_ int mount_fd = -EBADF, userns_fd = -EBADF, dir_fd = -EBADF;
7,184✔
747
        int r;
3,592✔
748

749
        assert(path);
3,592✔
750

751
        if (!mount_new_api_supported())
3,592✔
752
                return false;
753

754
        userns_fd = r = userns_acquire_self_root();
3,592✔
755
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
3,592✔
756
                return false;
757
        if (r == -ENOSPC) {
3,592✔
758
                log_debug_errno(r, "Failed to acquire new user namespace, user.max_user_namespaces seems to be exhausted or maybe even zero, assuming ID-mapping is not supported: %m");
×
759
                return false;
×
760
        }
761
        if (r < 0)
3,592✔
762
                return log_debug_errno(r, "Failed to acquire new user namespace for checking if '%s' supports ID-mapping: %m", path);
×
763

764
        dir_fd = r = RET_NERRNO(open(path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
3,592✔
765
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
7,184✔
766
                return false;
767
        if (r < 0)
3,592✔
768
                return log_debug_errno(r, "Failed to open '%s', cannot determine if ID-mapping is supported: %m", path);
×
769

770
        mount_fd = r = RET_NERRNO(open_tree(dir_fd, "", AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC));
3,592✔
771
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
3,592✔
772
                return false;
×
773
        if (r < 0)
3,592✔
774
                return log_debug_errno(r, "Failed to open mount tree '%s', cannot determine if ID-mapping is supported: %m", path);
×
775

776
        r = RET_NERRNO(mount_setattr(mount_fd, "", AT_EMPTY_PATH,
3,592✔
777
                       &(struct mount_attr) {
3,592✔
778
                                .attr_set = MOUNT_ATTR_IDMAP | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RDONLY | MOUNT_ATTR_NODEV,
779
                                .userns_fd = userns_fd,
780
                        }, sizeof(struct mount_attr)));
781
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
3,592✔
782
                return false;
×
783
        if (r < 0)
3,592✔
784
                return log_debug_errno(r, "Failed to set mount attribute to '%s', cannot determine if ID-mapping is supported: %m", path);
×
785

786
        return true;
787
}
788

789
int netns_acquire(void) {
7✔
790
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
7✔
791
        int r;
7✔
792

793
        /* Forks off a process in a new network namespace, acquires a network namespace fd, and then kills
794
         * the process again. This way we have a netns fd that is not bound to any process. */
795

796
        r = pidref_safe_fork("(sd-mknetns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_NETNS|FORK_FREEZE, &pid);
7✔
797
        if (r < 0)
7✔
798
                return log_debug_errno(r, "Failed to fork process into new netns: %m");
×
799
        assert(r > 0);
7✔
800

801
        return pidref_namespace_open_by_type(&pid, NAMESPACE_NET);
7✔
802
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc