• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 22007273413

13 Feb 2026 09:37PM UTC coverage: 72.743% (+0.3%) from 72.485%
22007273413

push

github

web-flow
test: do not fail when parsing PID that isn't thread-group leader (#40677)

```
TEST-02-UNITTESTS.sh[4382]: [  707.393188] test-cgroup-util[426]: Failed to open pidfd for pid 414: Invalid argument
TEST-02-UNITTESTS.sh[4382]: [  707.393193] test-cgroup-util[426]: src/test/test-cgroup-util.c:249: Assertion failed: Expected "r = proc_dir_read_pidref(d, &pid)" to succeed, but got error: -22/EINVAL
```

The kernel can return EINVAL on pidfd_open() when the selected PID is
not a thread group leader. Don't fail the test, as we are iterating on
everything, so this can seldomly happen.

3 of 4 new or added lines in 1 file covered. (75.0%)

6126 existing lines in 58 files now uncovered.

312809 of 430017 relevant lines covered (72.74%)

1147140.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.25
/src/basic/namespace-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <fcntl.h>
4
#include <linux/magic.h>
5
#include <linux/nsfs.h>
6
#include <sched.h>
7
#include <sys/ioctl.h>
8
#include <sys/mount.h>
9
#include <unistd.h>
10

11
#include "dlfcn-util.h"
12
#include "errno-util.h"
13
#include "fd-util.h"
14
#include "fileio.h"
15
#include "log.h"
16
#include "mountpoint-util.h"
17
#include "namespace-util.h"
18
#include "parse-util.h"
19
#include "pidfd-util.h"
20
#include "pidref.h"
21
#include "process-util.h"
22
#include "stat-util.h"
23
#include "stdio-util.h"
24
#include "uid-range.h"
25
#include "user-util.h"
26

27
const struct namespace_info namespace_info[_NAMESPACE_TYPE_MAX + 1] = {
28
        [NAMESPACE_CGROUP] =  { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, PROC_CGROUP_INIT_INO },
29
        [NAMESPACE_IPC]    =  { "ipc",    "ns/ipc",    CLONE_NEWIPC,    PIDFD_GET_IPC_NAMESPACE,    PROC_IPC_INIT_INO    },
30
        [NAMESPACE_NET]    =  { "net",    "ns/net",    CLONE_NEWNET,    PIDFD_GET_NET_NAMESPACE,    0                    },
31
        /* So, the mount namespace flag is called CLONE_NEWNS for historical
32
         * reasons. Let's expose it here under a more explanatory name: "mnt".
33
         * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
34
        [NAMESPACE_MOUNT]  =  { "mnt",    "ns/mnt",    CLONE_NEWNS,     PIDFD_GET_MNT_NAMESPACE,    0                    },
35
        [NAMESPACE_PID]    =  { "pid",    "ns/pid",    CLONE_NEWPID,    PIDFD_GET_PID_NAMESPACE,    PROC_PID_INIT_INO    },
36
        [NAMESPACE_USER]   =  { "user",   "ns/user",   CLONE_NEWUSER,   PIDFD_GET_USER_NAMESPACE,   PROC_USER_INIT_INO   },
37
        [NAMESPACE_UTS]    =  { "uts",    "ns/uts",    CLONE_NEWUTS,    PIDFD_GET_UTS_NAMESPACE,    PROC_UTS_INIT_INO    },
38
        [NAMESPACE_TIME]   =  { "time",   "ns/time",   CLONE_NEWTIME,   PIDFD_GET_TIME_NAMESPACE,   PROC_TIME_INIT_INO   },
39
        {}, /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */
40
};
41

42
#define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path)
43

44
NamespaceType clone_flag_to_namespace_type(unsigned long clone_flag) {
822✔
45
        for (NamespaceType t = 0; t < _NAMESPACE_TYPE_MAX; t++)
4,674✔
46
                if (((namespace_info[t].clone_flag ^ clone_flag) & (CLONE_NEWCGROUP|CLONE_NEWIPC|CLONE_NEWNET|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUSER|CLONE_NEWUTS|CLONE_NEWTIME)) == 0)
4,674✔
47
                        return t;
48

49
        return _NAMESPACE_TYPE_INVALID;
50
}
51

52
bool namespace_type_supported(NamespaceType type) {
1,457✔
53
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
1,457✔
54

55
        const char *p = pid_namespace_path(0, type);
7,285✔
56
        return access(p, F_OK) >= 0;
1,457✔
57
}
58

59
static int pidref_namespace_open_by_type_internal(const PidRef *pidref, NamespaceType type, bool *need_verify) {
8,490✔
60
        int r;
8,490✔
61

62
        assert(pidref_is_set(pidref));
8,490✔
63
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
8,490✔
64

65
        if (pidref_is_remote(pidref))
8,490✔
66
                return -EREMOTE;
8,490✔
67

68
        if (pidref->fd >= 0) {
8,490✔
69
                r = pidfd_get_namespace(pidref->fd, namespace_info[type].pidfd_get_ns_ioctl_cmd);
8,484✔
70
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
8,484✔
71
                        return r;
72
        }
73

74
        if (need_verify) /* The caller shall call pidref_verify() later */
6✔
75
                *need_verify = true;
×
76

77
        _cleanup_close_ int nsfd = -EBADF;
8,490✔
78
        const char *p;
6✔
79

80
        p = pid_namespace_path(pidref->pid, type);
6✔
81
        nsfd = RET_NERRNO(open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC));
6✔
82
        if (nsfd == -ENOENT) {
×
83
                r = proc_mounted();
×
84
                if (r == 0)
×
85
                        return -ENOSYS;  /* /proc/ is not available or not set up properly, we're most likely
86
                                            in some chroot environment. */
87
                if (r > 0)
×
88
                        return -ENOPKG;  /* If /proc/ is definitely around then this means the namespace type is not supported */
89

90
                /* can't determine? then propagate original error */
91
        }
92
        if (nsfd < 0)
6✔
93
                return nsfd;
94

95
        if (!need_verify) { /* Otherwise we verify on our own */
6✔
96
                r = pidref_verify(pidref);
6✔
97
                if (r < 0)
6✔
98
                        return r;
×
99
        }
100

101
        return TAKE_FD(nsfd);
102
}
103

104
int pidref_namespace_open_by_type(const PidRef *pidref, NamespaceType type) {
8,175✔
105
        return pidref_namespace_open_by_type_internal(pidref, type, NULL);
8,175✔
106
}
107

108
int namespace_open_by_type(NamespaceType type) {
4,830✔
109
        _cleanup_(pidref_done) PidRef self = PIDREF_NULL;
4,830✔
110
        int r;
4,830✔
111

112
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
4,830✔
113

114
        r = pidref_set_self(&self);
4,830✔
115
        if (r < 0)
4,830✔
116
                return r;
117

118
        return pidref_namespace_open_by_type(&self, type);
4,830✔
119
}
120

121
int pidref_namespace_open(
156✔
122
                const PidRef *pidref,
123
                int *ret_pidns_fd,
124
                int *ret_mntns_fd,
125
                int *ret_netns_fd,
126
                int *ret_userns_fd,
127
                int *ret_root_fd) {
128

129
        _cleanup_close_ int pidns_fd = -EBADF, mntns_fd = -EBADF, netns_fd = -EBADF,
312✔
130
                userns_fd = -EBADF, root_fd = -EBADF;
312✔
131
        bool need_verify = false;
156✔
132
        int r;
156✔
133

134
        assert(pidref_is_set(pidref));
156✔
135

136
        if (pidref_is_remote(pidref))
312✔
137
                return -EREMOTE;
138

139
        if (ret_pidns_fd) {
156✔
140
                pidns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_PID, &need_verify);
117✔
141
                if (pidns_fd < 0)
117✔
142
                        return pidns_fd;
143
        }
144

145
        if (ret_mntns_fd) {
156✔
146
                mntns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_MOUNT, &need_verify);
118✔
147
                if (mntns_fd < 0)
118✔
148
                        return mntns_fd;
149
        }
150

151
        if (ret_netns_fd) {
156✔
152
                netns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_NET, &need_verify);
41✔
153
                if (netns_fd < 0)
41✔
154
                        return netns_fd;
155
        }
156

157
        if (ret_userns_fd) {
156✔
158
                userns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_USER, &need_verify);
39✔
159
                if (userns_fd < 0 && userns_fd != -ENOPKG)
39✔
160
                        return userns_fd;
161
        }
162

163
        if (ret_root_fd) {
156✔
164
                const char *root;
118✔
165

166
                root = procfs_file_alloca(pidref->pid, "root");
118✔
167
                root_fd = RET_NERRNO(open(root, O_CLOEXEC|O_DIRECTORY));
118✔
168
                if (root_fd == -ENOENT && proc_mounted() == 0)
×
169
                        return -ENOSYS;
170
                if (root_fd < 0)
118✔
171
                        return root_fd;
172

173
                need_verify = true;
118✔
174
        }
175

176
        if (need_verify) {
156✔
177
                r = pidref_verify(pidref);
118✔
178
                if (r < 0)
118✔
179
                        return r;
180
        }
181

182
        if (ret_pidns_fd)
156✔
183
                *ret_pidns_fd = TAKE_FD(pidns_fd);
117✔
184

185
        if (ret_mntns_fd)
156✔
186
                *ret_mntns_fd = TAKE_FD(mntns_fd);
118✔
187

188
        if (ret_netns_fd)
156✔
189
                *ret_netns_fd = TAKE_FD(netns_fd);
41✔
190

191
        if (ret_userns_fd)
156✔
192
                *ret_userns_fd = TAKE_FD(userns_fd);
39✔
193

194
        if (ret_root_fd)
156✔
195
                *ret_root_fd = TAKE_FD(root_fd);
118✔
196

197
        return 0;
198
}
199

200
int namespace_open(
36✔
201
                pid_t pid,
202
                int *ret_pidns_fd,
203
                int *ret_mntns_fd,
204
                int *ret_netns_fd,
205
                int *ret_userns_fd,
206
                int *ret_root_fd) {
207

208
        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
36✔
209
        int r;
36✔
210

211
        r = pidref_set_pid(&pidref, pid);
36✔
212
        if (r < 0)
36✔
213
                return r;
214

215
        return pidref_namespace_open(&pidref, ret_pidns_fd, ret_mntns_fd, ret_netns_fd, ret_userns_fd, ret_root_fd);
36✔
216
}
217

218
int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
367✔
219
        int r;
367✔
220

221
        /* Block dlopen() now, to avoid us inadvertently loading shared library from another namespace */
222
        block_dlopen();
367✔
223

224
        if (userns_fd >= 0) {
367✔
225
                /* Can't setns to your own userns, since then you could escalate from non-root to root in
226
                 * your own namespace, so check if namespaces are equal before attempting to enter. */
227

228
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
14✔
229
                if (r < 0)
14✔
230
                        return r;
231
                if (r > 0)
14✔
232
                        userns_fd = -EBADF;
2✔
233
        }
234

235
        if (pidns_fd >= 0)
367✔
236
                if (setns(pidns_fd, CLONE_NEWPID) < 0)
62✔
237
                        return -errno;
×
238

239
        if (mntns_fd >= 0)
367✔
240
                if (setns(mntns_fd, CLONE_NEWNS) < 0)
300✔
241
                        return -errno;
×
242

243
        if (netns_fd >= 0)
367✔
244
                if (setns(netns_fd, CLONE_NEWNET) < 0)
56✔
245
                        return -errno;
×
246

247
        if (userns_fd >= 0)
367✔
248
                if (setns(userns_fd, CLONE_NEWUSER) < 0)
12✔
249
                        return -errno;
×
250

251
        if (root_fd >= 0) {
367✔
252
                if (fchdir(root_fd) < 0)
63✔
253
                        return -errno;
×
254

255
                if (chroot(".") < 0)
63✔
256
                        return -errno;
×
257
        }
258

259
        if (userns_fd >= 0)
367✔
260
                return reset_uid_gid();
12✔
261

262
        return 0;
263
}
264

UNCOV
265
static int namespace_enter_one_idempotent(int nsfd, NamespaceType type) {
×
UNCOV
266
        int r;
×
267

268
        /* Join a namespace, but only if we're not part of it already. This is important if we don't necessarily
269
         * own the namespace in question, as kernel would unconditionally return EPERM otherwise. */
270

UNCOV
271
        assert(nsfd >= 0);
×
UNCOV
272
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
×
273

UNCOV
274
        r = is_our_namespace(nsfd, type);
×
UNCOV
275
        if (r < 0)
×
276
                return r;
UNCOV
277
        if (r > 0)
×
278
                return 0;
279

UNCOV
280
        if (setns(nsfd, namespace_info[type].clone_flag) < 0)
×
UNCOV
281
                return -errno;
×
282

283
        return 1;
284
}
285

UNCOV
286
int namespace_enter_delegated(int userns_fd, int pidns_fd, int mntns_fd, int netns_fd, int root_fd) {
×
UNCOV
287
        int r;
×
288

289
        /* Similar to namespace_enter(), but operates on a set of namespaces that are potentially owned
290
         * by the userns ("delegated"), in which case we'll need to gain CAP_SYS_ADMIN by joining
291
         * the userns first, and the rest later. */
292

UNCOV
293
        assert(userns_fd >= 0);
×
294

295
        /* Block dlopen() now, to avoid us inadvertently loading shared library from another namespace */
UNCOV
296
        block_dlopen();
×
297

UNCOV
298
        if (setns(userns_fd, CLONE_NEWUSER) < 0)
×
UNCOV
299
                return -errno;
×
300

UNCOV
301
        if (pidns_fd >= 0) {
×
UNCOV
302
                r = namespace_enter_one_idempotent(pidns_fd, NAMESPACE_PID);
×
UNCOV
303
                if (r < 0)
×
304
                        return r;
305
        }
306

UNCOV
307
        if (mntns_fd >= 0) {
×
UNCOV
308
                r = namespace_enter_one_idempotent(mntns_fd, NAMESPACE_MOUNT);
×
UNCOV
309
                if (r < 0)
×
310
                        return r;
311
        }
312

UNCOV
313
        if (netns_fd >= 0) {
×
UNCOV
314
                r = namespace_enter_one_idempotent(netns_fd, NAMESPACE_NET);
×
UNCOV
315
                if (r < 0)
×
316
                        return r;
317
        }
318

UNCOV
319
        if (root_fd >= 0) {
×
UNCOV
320
                if (fchdir(root_fd) < 0)
×
UNCOV
321
                        return -errno;
×
322

UNCOV
323
                if (chroot(".") < 0)
×
UNCOV
324
                        return -errno;
×
325
        }
326

UNCOV
327
        return maybe_setgroups(/* size = */ 0, NULL);
×
328
}
329

330
int fd_is_namespace(int fd, NamespaceType type) {
821✔
331
        int r;
821✔
332

333
        /* Checks whether the specified file descriptor refers to a namespace (of type if type != _NAMESPACE_INVALID). */
334

335
        assert(fd >= 0);
821✔
336
        assert(type < _NAMESPACE_TYPE_MAX);
821✔
337

338
        r = fd_is_fs_type(fd, NSFS_MAGIC);
821✔
339
        if (r <= 0)
821✔
340
                return r;
341

342
        if (type < 0)
818✔
343
                return true;
344

345
        int clone_flag = ioctl(fd, NS_GET_NSTYPE);
818✔
346
        if (clone_flag < 0)
818✔
UNCOV
347
                return -errno;
×
348

349
        NamespaceType found_type = clone_flag_to_namespace_type(clone_flag);
818✔
350
        if (found_type < 0)
818✔
351
                return -EBADF; /* Uh? Unknown namespace type? */
352

353
        return found_type == type;
818✔
354
}
355

356
int is_our_namespace(int fd, NamespaceType type) {
352✔
357
        int r;
352✔
358

359
        assert(fd >= 0);
352✔
360
        assert(type < _NAMESPACE_TYPE_MAX);
352✔
361

362
        r = fd_is_namespace(fd, type);
352✔
363
        if (r < 0)
352✔
364
                return r;
352✔
365
        if (r == 0) /* Not a namespace or not of the right type? */
352✔
366
                return -EUCLEAN;
367

368
        _cleanup_close_ int our_ns = namespace_open_by_type(type);
704✔
369
        if (our_ns < 0)
352✔
370
                return our_ns;
371

372
        return fd_inode_same(fd, our_ns);
352✔
373
}
374

375
int namespace_is_init(NamespaceType type) {
5,317✔
376
        int r;
5,317✔
377

378
        assert(type >= 0);
5,317✔
379
        assert(type < _NAMESPACE_TYPE_MAX);
5,317✔
380

381
        if (namespace_info[type].root_inode == 0)
5,317✔
382
                return -EBADR; /* Cannot answer this question */
5,317✔
383

384
        const char *p = pid_namespace_path(0, type);
26,575✔
385

386
        struct stat st;
5,315✔
387
        r = RET_NERRNO(stat(p, &st));
5,315✔
388
        if (r == -ENOENT) {
15✔
389
                /* If the /proc/ns/<type> API is not around in /proc/ then ns is off in the kernel and we are in the init ns */
390
                r = proc_mounted();
15✔
391
                if (r < 0)
15✔
392
                        return -ENOENT; /* If we can't determine if /proc/ is mounted propagate original error */
393

394
                return r ? true : -ENOSYS;
15✔
395
        }
396
        if (r < 0)
5,300✔
397
                return r;
398

399
        return st.st_ino == namespace_info[type].root_inode;
5,300✔
400
}
401

402
int pidref_in_same_namespace(PidRef *pid1, PidRef *pid2, NamespaceType type) {
99✔
403
        _cleanup_close_ int ns1 = -EBADF, ns2 = -EBADF;
99✔
404

405
        /* Accepts NULL to indicate our own process */
406

407
        assert(!pid1 || pidref_is_set(pid1));
99✔
408
        assert(!pid2 || pidref_is_set(pid2));
99✔
409
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
99✔
410

411
        if (pidref_equal(pid1, pid2))
99✔
412
                return true;
413

414
        if (!pid1)
99✔
415
                ns1 = namespace_open_by_type(type);
81✔
416
        else
417
                ns1 = pidref_namespace_open_by_type(pid1, type);
18✔
418
        if (ns1 < 0)
99✔
419
                return ns1;
420

421
        if (!pid2)
99✔
422
                ns2 = namespace_open_by_type(type);
6✔
423
        else
424
                ns2 = pidref_namespace_open_by_type(pid2, type);
93✔
425
        if (ns2 < 0)
99✔
426
                return ns2;
427

428
        return fd_inode_same(ns1, ns2);
99✔
429
}
430

431
int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) {
2✔
432
        assert(pid1 >= 0);
2✔
433
        assert(pid2 >= 0);
2✔
434
        return pidref_in_same_namespace(pid1 == 0 ? NULL : &PIDREF_MAKE_FROM_PID(pid1),
2✔
UNCOV
435
                                        pid2 == 0 ? NULL : &PIDREF_MAKE_FROM_PID(pid2),
×
436
                                        type);
437
}
438

439
int namespace_get_leader(PidRef *pidref, NamespaceType type, PidRef *ret) {
3✔
440
        int r;
3✔
441

442
        /* Note: we don't bother with pidref_is_set()/pidref_is_remote() here, as the first call we do,
443
         * pidref_get_ppid_as_pidref() calls those anyway */
444

445
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
3✔
446
        assert(ret);
3✔
447

448
        _cleanup_(pidref_done) PidRef current = PIDREF_NULL;
3✔
449
        PidRef *c = pidref;
3✔
450

451
        for (;;) {
5✔
452
                _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
3✔
453

454
                r = pidref_get_ppid_as_pidref(c, &parent);
8✔
455
                if (r < 0)
8✔
456
                        return r;
457

458
                r = pidref_in_same_namespace(c, &parent, type);
8✔
459
                if (r < 0)
8✔
460
                        return r;
461
                if (r == 0) {
8✔
462
                        /* If the parent and the child are not in the same namespace, then the child is
463
                         * the leader we are looking for. */
464

465
                        if (pidref_is_set(&current))
3✔
466
                                *ret = TAKE_PIDREF(current);
3✔
467
                        else {
468
                                r = pidref_copy(c, ret);
×
UNCOV
469
                                if (r < 0)
×
470
                                        return r;
471
                        }
472

473
                        return 0;
3✔
474
                }
475

476
                pidref_done(&current);
5✔
477
                current = TAKE_PIDREF(parent);
5✔
478
                c = &current;
5✔
479
        }
480
}
481

482
int detach_mount_namespace(void) {
290✔
483
        /* Detaches the mount namespace, disabling propagation from our namespace to the host. Sets
484
         * propagation first to MS_SLAVE for all mounts (disabling propagation), and then back to MS_SHARED
485
         * (so that we create a new peer group).  */
486

487
        if (unshare(CLONE_NEWNS) < 0)
290✔
UNCOV
488
                return log_debug_errno(errno, "Failed to acquire mount namespace: %m");
×
489

490
        if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
290✔
UNCOV
491
                return log_debug_errno(errno, "Failed to set mount propagation to MS_SLAVE for all mounts: %m");
×
492

493
        if (mount(NULL, "/", NULL, MS_SHARED | MS_REC, NULL) < 0)
290✔
UNCOV
494
                return log_debug_errno(errno, "Failed to set mount propagation back to MS_SHARED for all mounts: %m");
×
495

496
        return 0;
497
}
498

499
int detach_mount_namespace_harder(uid_t target_uid, gid_t target_gid) {
85✔
500
        uid_t from_uid;
85✔
501
        gid_t from_gid;
85✔
502
        int r;
85✔
503

504
        /* Tried detach_mount_namespace() first. If that doesn't work due to permissions, opens up an
505
         * unprivileged user namespace with a mapping of the originating UID/GID to the specified target
506
         * UID/GID. Then, tries detach_mount_namespace() again.
507
         *
508
         * Or in other words: tries much harder to get a mount namespace, making use of unprivileged user
509
         * namespaces if need be.
510
         *
511
         * Note that after this function completed:
512
         *
513
         *    → if we had privs, afterwards uids/gids on files and processes are as before
514
         *
515
         *    → if we had no privs, our own id and all our files will show up owned by target_uid/target_gid,
516
         *    and everything else owned by nobody.
517
         *
518
         * Yes, that's quite a difference. */
519

520
        if (!uid_is_valid(target_uid))
85✔
521
                return -EINVAL;
522
        if (!gid_is_valid(target_gid))
85✔
523
                return -EINVAL;
524

525
        r = detach_mount_namespace();
85✔
526
        if (r != -EPERM)
85✔
527
                return r;
528

529
        from_uid = getuid();
×
UNCOV
530
        from_gid = getgid();
×
531

532
        if (unshare(CLONE_NEWUSER) < 0)
×
UNCOV
533
                return log_debug_errno(errno, "Failed to acquire user namespace: %m");
×
534

535
        r = write_string_filef("/proc/self/uid_map", 0,
×
536
                               UID_FMT " " UID_FMT " 1\n", target_uid, from_uid);
UNCOV
537
        if (r < 0)
×
UNCOV
538
                return log_debug_errno(r, "Failed to write uid map: %m");
×
539

UNCOV
540
        r = write_string_file("/proc/self/setgroups", "deny", 0);
×
UNCOV
541
        if (r < 0)
×
UNCOV
542
                return log_debug_errno(r, "Failed to write setgroups file: %m");
×
543

UNCOV
544
        r = write_string_filef("/proc/self/gid_map", 0,
×
545
                               GID_FMT " " GID_FMT " 1\n", target_gid, from_gid);
UNCOV
546
        if (r < 0)
×
UNCOV
547
                return log_debug_errno(r, "Failed to write gid map: %m");
×
548

UNCOV
549
        return detach_mount_namespace();
×
550
}
551

552
int detach_mount_namespace_userns(int userns_fd) {
65✔
553
        int r;
65✔
554

555
        assert(userns_fd >= 0);
65✔
556

557
        if (setns(userns_fd, CLONE_NEWUSER) < 0)
65✔
UNCOV
558
                return log_debug_errno(errno, "Failed to join user namespace: %m");
×
559

560
        r = reset_uid_gid();
65✔
561
        if (r < 0)
65✔
UNCOV
562
                return log_debug_errno(r, "Failed to become root in user namespace: %m");
×
563

564
        return detach_mount_namespace();
65✔
565
}
566

567
int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_range) {
2✔
568
        _cleanup_free_ char *buffer = NULL;
2✔
569
        const char *range, *shift;
2✔
570
        int r;
2✔
571
        uid_t uid_shift, uid_range = 65536;
2✔
572

573
        assert(s);
2✔
574

575
        range = strchr(s, ':');
2✔
576
        if (range) {
2✔
UNCOV
577
                buffer = strndup(s, range - s);
×
578
                if (!buffer)
×
UNCOV
579
                        return log_oom();
×
UNCOV
580
                shift = buffer;
×
581

UNCOV
582
                range++;
×
UNCOV
583
                r = safe_atou32(range, &uid_range);
×
584
                if (r < 0)
×
UNCOV
585
                        return log_error_errno(r, "Failed to parse UID range \"%s\": %m", range);
×
586
        } else
587
                shift = s;
588

589
        r = parse_uid(shift, &uid_shift);
2✔
590
        if (r < 0)
2✔
591
                return log_error_errno(r, "Failed to parse UID \"%s\": %m", s);
2✔
592

UNCOV
593
        if (!userns_shift_range_valid(uid_shift, uid_range))
×
UNCOV
594
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID range cannot be empty or go beyond " UID_FMT ".", UID_INVALID);
×
595

UNCOV
596
        if (ret_uid_shift)
×
UNCOV
597
                *ret_uid_shift = uid_shift;
×
598

UNCOV
599
        if (ret_uid_range)
×
UNCOV
600
                *ret_uid_range = uid_range;
×
601

602
        return 0;
603
}
604

605
int userns_acquire_empty(void) {
99✔
606
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
99✔
607
        int r;
99✔
608

609
        r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
99✔
610
        if (r < 0)
99✔
611
                return r;
612
        assert(r > 0);
98✔
613

614
        return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
98✔
615
}
616

617
int userns_acquire(const char *uid_map, const char *gid_map, bool setgroups_deny) {
3,108✔
618
        char path[STRLEN("/proc//setgroups") + DECIMAL_STR_MAX(pid_t) + 1];
3,108✔
619
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
3,108✔
620
        int r;
3,108✔
621

622
        assert(uid_map);
3,108✔
623
        assert(gid_map);
3,108✔
624

625
        /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it,
626
         * and then kills the process again. This way we have a userns fd that is not bound to any
627
         * process. We can use that for file system mounts and similar. */
628

629
        r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
3,108✔
630
        if (r < 0)
3,108✔
631
                return r;
632
        assert(r > 0);
3,108✔
633

634
        xsprintf(path, "/proc/" PID_FMT "/uid_map", pid.pid);
3,108✔
635
        r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
3,108✔
636
        if (r < 0)
3,108✔
637
                return log_debug_errno(r, "Failed to write UID map: %m");
×
638

639
        if (setgroups_deny) {
3,108✔
640
                xsprintf(path, "/proc/" PID_FMT "/setgroups", pid.pid);
3,108✔
641
                r = write_string_file(path, "deny", WRITE_STRING_FILE_DISABLE_BUFFER);
3,108✔
642
                if (r < 0)
3,108✔
UNCOV
643
                        return log_debug_errno(r, "Failed to write setgroups file: %m");
×
644
        }
645

646
        xsprintf(path, "/proc/" PID_FMT "/gid_map", pid.pid);
3,108✔
647
        r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
3,108✔
648
        if (r < 0)
3,108✔
649
                return log_debug_errno(r, "Failed to write GID map: %m");
×
650

651
        return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
3,108✔
652
}
653

654
int userns_acquire_self_root(void) {
2,928✔
655

656
        /* Returns a user namespace with only our own uid/gid mapped to root, and everything else unmapped.
657
         *
658
         * Note: this can be acquired unprivileged! */
659

660
        _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
2,928✔
661
        if (asprintf(&uid_map, "0 " UID_FMT " 1", getuid()) < 0)
2,928✔
662
                return -ENOMEM;
663
        if (asprintf(&gid_map, "0 " GID_FMT " 1", getgid()) < 0)
2,928✔
664
                return -ENOMEM;
665

666
        return userns_acquire(uid_map, gid_map, /* setgroups_deny= */ true);
2,928✔
667
}
668

669
int userns_enter_and_pin(int userns_fd, PidRef *ret) {
304✔
670
        _cleanup_close_pair_ int pfd[2] = EBADF_PAIR;
304✔
671
        _cleanup_(pidref_done_sigkill_wait) PidRef pidref = PIDREF_NULL;
304✔
672
        ssize_t n;
304✔
673
        char x;
304✔
674
        int r;
304✔
675

676
        assert(userns_fd >= 0);
304✔
677
        assert(ret);
304✔
678

679
        if (pipe2(pfd, O_CLOEXEC) < 0)
304✔
UNCOV
680
                return -errno;
×
681

682
        r = pidref_safe_fork_full(
608✔
683
                        "(sd-pinuserns)",
684
                        /* stdio_fds= */ NULL,
685
                        (int[]) { pfd[1], userns_fd }, 2,
304✔
686
                        FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG|FORK_DEATHSIG_SIGKILL,
687
                        &pidref);
688
        if (r < 0)
304✔
689
                return r;
690
        if (r == 0) {
304✔
691
                /* Child. */
692

UNCOV
693
                if (setns(userns_fd, CLONE_NEWUSER) < 0) {
×
UNCOV
694
                        log_debug_errno(errno, "Failed to join userns: %m");
×
UNCOV
695
                        _exit(EXIT_FAILURE);
×
696
                }
697

UNCOV
698
                userns_fd = safe_close(userns_fd);
×
699

UNCOV
700
                n = write(pfd[1], &(const char) { 'x' }, 1);
×
UNCOV
701
                if (n < 0) {
×
UNCOV
702
                        log_debug_errno(errno, "Failed to write to pipe: %m");
×
UNCOV
703
                        _exit(EXIT_FAILURE);
×
704
                }
UNCOV
705
                assert(n == 1);
×
706

UNCOV
707
                freeze();
×
708
        }
709

710
        pfd[1] = safe_close(pfd[1]);
304✔
711

712
        n = read(pfd[0], &x, 1);
304✔
713
        if (n < 0)
304✔
UNCOV
714
                return -errno;
×
715
        if (n == 0)
304✔
716
                return -EPROTO;
717
        assert(n == 1);
304✔
718
        assert(x == 'x');
304✔
719

720
        *ret = TAKE_PIDREF(pidref);
304✔
721
        return 0;
304✔
722
}
723

724
bool userns_supported(void) {
265✔
725
        return access("/proc/self/uid_map", F_OK) >= 0;
265✔
726
}
727

728
int userns_get_base_uid(int userns_fd, uid_t *ret_uid, gid_t *ret_gid) {
26✔
729
        _cleanup_(pidref_done_sigkill_wait) PidRef pidref = PIDREF_NULL;
26✔
730
        int r;
26✔
731

732
        assert(userns_fd >= 0);
26✔
733

734
        r = userns_enter_and_pin(userns_fd, &pidref);
26✔
735
        if (r < 0)
26✔
736
                return r;
737

738
        uid_t uid;
26✔
739
        r = uid_map_search_root(pidref.pid, UID_RANGE_USERNS_OUTSIDE, &uid);
26✔
740
        if (r < 0)
26✔
741
                return r;
742

743
        gid_t gid;
25✔
744
        r = uid_map_search_root(pidref.pid, GID_RANGE_USERNS_OUTSIDE, &gid);
25✔
745
        if (r < 0)
25✔
746
                return r;
747

748
        if (!ret_gid && uid != gid)
25✔
749
                return -EUCLEAN;
750

751
        if (ret_uid)
24✔
752
                *ret_uid = uid;
24✔
753
        if (ret_gid)
24✔
754
                *ret_gid = gid;
1✔
755

756
        return 0;
757
}
758

759
int process_is_owned_by_uid(const PidRef *pidref, uid_t uid) {
14✔
760
        int r;
14✔
761

762
        /* Checks if the specified process either is owned directly by the specified user, or if it is inside
763
         * a user namespace owned by it. */
764

765
        assert(uid_is_valid(uid));
14✔
766

767
        uid_t process_uid;
14✔
768
        r = pidref_get_uid(pidref, &process_uid);
14✔
769
        if (r < 0)
14✔
770
                return r;
14✔
771
        if (process_uid == uid)
14✔
772
                return true;
773

774
        _cleanup_close_ int userns_fd = -EBADF;
14✔
775
        userns_fd = pidref_namespace_open_by_type(pidref, NAMESPACE_USER);
11✔
776
        if (userns_fd == -ENOPKG) /* If userns is not supported, then they don't matter for ownership */
11✔
777
                return false;
778
        if (userns_fd < 0)
11✔
779
                return userns_fd;
780

UNCOV
781
        for (unsigned iteration = 0;; iteration++) {
×
782
                uid_t ns_uid;
11✔
783

784
                /* This process is in our own userns? Then we are done, in our own userns only the UIDs
785
                 * themselves matter. */
786
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
11✔
787
                if (r < 0)
11✔
788
                        return r;
11✔
789
                if (r > 0)
11✔
790
                        return false;
791

792
                if (ioctl(userns_fd, NS_GET_OWNER_UID, &ns_uid) < 0)
9✔
UNCOV
793
                        return -errno;
×
794
                if (ns_uid == uid)
9✔
795
                        return true;
796

797
                /* Paranoia check */
UNCOV
798
                if (iteration > 16)
×
UNCOV
799
                        return log_debug_errno(SYNTHETIC_ERRNO(ELOOP), "Giving up while tracing parents of user namespaces after %u steps.", iteration);
×
800

801
                /* Go up the tree */
802
                _cleanup_close_ int parent_fd = ioctl(userns_fd, NS_GET_USERNS);
11✔
UNCOV
803
                if (parent_fd < 0) {
×
UNCOV
804
                        if (errno == EPERM) /* EPERM means we left our own userns */
×
805
                                return false;
806

UNCOV
807
                        return -errno;
×
808
                }
809

UNCOV
810
                close_and_replace(userns_fd, parent_fd);
×
811
        }
812
}
813

814
int is_idmapping_supported(const char *path) {
2,927✔
815
        _cleanup_close_ int mount_fd = -EBADF, userns_fd = -EBADF, dir_fd = -EBADF;
5,854✔
816
        int r;
2,927✔
817

818
        assert(path);
2,927✔
819

820
        if (!mount_new_api_supported())
2,927✔
821
                return false;
822

823
        userns_fd = r = userns_acquire_self_root();
2,927✔
824
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
2,927✔
825
                return false;
826
        if (r == -ENOSPC) {
2,927✔
UNCOV
827
                log_debug_errno(r, "Failed to acquire new user namespace, user.max_user_namespaces seems to be exhausted or maybe even zero, assuming ID-mapping is not supported: %m");
×
UNCOV
828
                return false;
×
829
        }
830
        if (r < 0)
2,927✔
UNCOV
831
                return log_debug_errno(r, "Failed to acquire new user namespace for checking if '%s' supports ID-mapping: %m", path);
×
832

833
        dir_fd = r = RET_NERRNO(open(path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2,927✔
834
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
5,854✔
835
                return false;
836
        if (r < 0)
2,927✔
UNCOV
837
                return log_debug_errno(r, "Failed to open '%s', cannot determine if ID-mapping is supported: %m", path);
×
838

839
        mount_fd = r = RET_NERRNO(open_tree(dir_fd, "", AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC));
2,927✔
840
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
2,927✔
UNCOV
841
                return false;
×
842
        if (r < 0)
2,927✔
UNCOV
843
                return log_debug_errno(r, "Failed to open mount tree '%s', cannot determine if ID-mapping is supported: %m", path);
×
844

845
        r = RET_NERRNO(mount_setattr(mount_fd, "", AT_EMPTY_PATH,
2,927✔
846
                       &(struct mount_attr) {
2,927✔
847
                                .attr_set = MOUNT_ATTR_IDMAP | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RDONLY | MOUNT_ATTR_NODEV,
848
                                .userns_fd = userns_fd,
849
                        }, sizeof(struct mount_attr)));
850
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
2,927✔
UNCOV
851
                return false;
×
852
        if (r < 0)
2,927✔
UNCOV
853
                return log_debug_errno(r, "Failed to set mount attribute to '%s', cannot determine if ID-mapping is supported: %m", path);
×
854

855
        return true;
856
}
857

858
int netns_acquire(void) {
7✔
859
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
7✔
860
        int r;
7✔
861

862
        /* Forks off a process in a new network namespace, acquires a network namespace fd, and then kills
863
         * the process again. This way we have a netns fd that is not bound to any process. */
864

865
        r = pidref_safe_fork("(sd-mknetns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_NETNS|FORK_FREEZE, &pid);
7✔
866
        if (r < 0)
7✔
UNCOV
867
                return log_debug_errno(r, "Failed to fork process into new netns: %m");
×
868
        assert(r > 0);
7✔
869

870
        return pidref_namespace_open_by_type(&pid, NAMESPACE_NET);
7✔
871
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc