• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 16062852561

03 Jul 2025 10:04PM UTC coverage: 72.193% (+0.1%) from 72.096%
16062852561

push

github

bluca
pcrlock: process components outside of location window properly

So far, when we tried to match a component to eent log entries we
skipped those components if they were outside of our location window.
That however is too aggressive, since it means any components that are
already in the logs, but outside of the location window will be
considered unrecognized in the logs, and thus removed from the PCR
policy.

Change things around: always try to match up all components, regardless
if inside the location window or outside, but then make it non-fatal we
can't find a component outside of the location window.

Fixes: #36079

7 of 9 new or added lines in 1 file covered. (77.78%)

4116 existing lines in 75 files now uncovered.

301219 of 417241 relevant lines covered (72.19%)

730820.5 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.31
/src/basic/namespace-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <fcntl.h>
4
#include <sys/ioctl.h>
5
#include <sys/mount.h>
6
#include <unistd.h>
7

8
#include "errno-util.h"
9
#include "fd-util.h"
10
#include "fileio.h"
11
#include "log.h"
12
#include "missing_magic.h"
13
#include "missing_namespace.h"
14
#include "mountpoint-util.h"
15
#include "namespace-util.h"
16
#include "parse-util.h"
17
#include "pidfd-util.h"
18
#include "pidref.h"
19
#include "process-util.h"
20
#include "stat-util.h"
21
#include "stdio-util.h"
22
#include "uid-range.h"
23
#include "user-util.h"
24

25
const struct namespace_info namespace_info[_NAMESPACE_TYPE_MAX + 1] = {
26
        [NAMESPACE_CGROUP] =  { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, PROC_CGROUP_INIT_INO },
27
        [NAMESPACE_IPC]    =  { "ipc",    "ns/ipc",    CLONE_NEWIPC,    PIDFD_GET_IPC_NAMESPACE,    PROC_IPC_INIT_INO    },
28
        [NAMESPACE_NET]    =  { "net",    "ns/net",    CLONE_NEWNET,    PIDFD_GET_NET_NAMESPACE,    0                    },
29
        /* So, the mount namespace flag is called CLONE_NEWNS for historical
30
         * reasons. Let's expose it here under a more explanatory name: "mnt".
31
         * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
32
        [NAMESPACE_MOUNT]  =  { "mnt",    "ns/mnt",    CLONE_NEWNS,     PIDFD_GET_MNT_NAMESPACE,    0                    },
33
        [NAMESPACE_PID]    =  { "pid",    "ns/pid",    CLONE_NEWPID,    PIDFD_GET_PID_NAMESPACE,    PROC_PID_INIT_INO    },
34
        [NAMESPACE_USER]   =  { "user",   "ns/user",   CLONE_NEWUSER,   PIDFD_GET_USER_NAMESPACE,   PROC_USER_INIT_INO   },
35
        [NAMESPACE_UTS]    =  { "uts",    "ns/uts",    CLONE_NEWUTS,    PIDFD_GET_UTS_NAMESPACE,    PROC_UTS_INIT_INO    },
36
        [NAMESPACE_TIME]   =  { "time",   "ns/time",   CLONE_NEWTIME,   PIDFD_GET_TIME_NAMESPACE,   PROC_TIME_INIT_INO   },
37
        {}, /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */
38
};
39

40
#define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path)
41

42
NamespaceType clone_flag_to_namespace_type(unsigned long clone_flag) {
43
        for (NamespaceType t = 0; t < _NAMESPACE_TYPE_MAX; t++)
582✔
44
                if (((namespace_info[t].clone_flag ^ clone_flag) & (CLONE_NEWCGROUP|CLONE_NEWIPC|CLONE_NEWNET|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUSER|CLONE_NEWUTS|CLONE_NEWTIME)) == 0)
582✔
45
                        return t;
46

47
        return _NAMESPACE_TYPE_INVALID;
48
}
49

50
bool namespace_type_supported(NamespaceType type) {
51
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
1,487✔
52

53
        const char *p = pid_namespace_path(0, type);
7,435✔
54
        return access(p, F_OK) >= 0;
1,487✔
55
}
56

57
static int pidref_namespace_open_by_type_internal(const PidRef *pidref, NamespaceType type, bool *need_verify) {
8,967✔
58
        int r;
8,967✔
59

60
        assert(pidref_is_set(pidref));
8,967✔
61
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
8,967✔
62

63
        if (pidref_is_remote(pidref))
8,967✔
64
                return -EREMOTE;
8,967✔
65

66
        if (pidref->fd >= 0) {
8,967✔
67
                r = pidfd_get_namespace(pidref->fd, namespace_info[type].pidfd_get_ns_ioctl_cmd);
8,965✔
68
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
8,965✔
69
                        return r;
70
        }
71

72
        if (need_verify) /* The caller shall call pidref_verify() later */
8,967✔
73
                *need_verify = true;
279✔
74

75
        _cleanup_close_ int nsfd = -EBADF;
8,967✔
76
        const char *p;
8,967✔
77

78
        p = pid_namespace_path(pidref->pid, type);
8,967✔
79
        nsfd = RET_NERRNO(open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC));
8,967✔
UNCOV
80
        if (nsfd == -ENOENT) {
×
81
                r = proc_mounted();
×
82
                if (r == 0)
×
83
                        return -ENOSYS;  /* /proc/ is not available or not set up properly, we're most likely
84
                                            in some chroot environment. */
UNCOV
85
                if (r > 0)
×
86
                        return -ENOPKG;  /* If /proc/ is definitely around then this means the namespace type is not supported */
87

88
                /* can't determine? then propagate original error */
89
        }
90
        if (nsfd < 0)
8,967✔
91
                return nsfd;
92

93
        if (!need_verify) { /* Otherwise we verify on our own */
8,967✔
94
                r = pidref_verify(pidref);
8,688✔
95
                if (r < 0)
8,688✔
UNCOV
96
                        return r;
×
97
        }
98

99
        return TAKE_FD(nsfd);
100
}
101

102
int pidref_namespace_open_by_type(const PidRef *pidref, NamespaceType type) {
103
        return pidref_namespace_open_by_type_internal(pidref, type, NULL);
8,688✔
104
}
105

106
int namespace_open_by_type(NamespaceType type) {
107
        _cleanup_(pidref_done) PidRef self = PIDREF_NULL;
4,514✔
108
        int r;
4,514✔
109

110
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
4,514✔
111

112
        r = pidref_set_self(&self);
4,514✔
113
        if (r < 0)
4,514✔
114
                return r;
115

116
        return pidref_namespace_open_by_type(&self, type);
4,514✔
117
}
118

119
int pidref_namespace_open(
120
                const PidRef *pidref,
121
                int *ret_pidns_fd,
122
                int *ret_mntns_fd,
123
                int *ret_netns_fd,
124
                int *ret_userns_fd,
125
                int *ret_root_fd) {
126

127
        _cleanup_close_ int pidns_fd = -EBADF, mntns_fd = -EBADF, netns_fd = -EBADF,
370✔
128
                userns_fd = -EBADF, root_fd = -EBADF;
370✔
129
        bool need_verify = false;
185✔
130
        int r;
185✔
131

132
        assert(pidref_is_set(pidref));
185✔
133

134
        if (pidref_is_remote(pidref))
370✔
135
                return -EREMOTE;
136

137
        if (ret_pidns_fd) {
185✔
138
                pidns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_PID, &need_verify);
80✔
139
                if (pidns_fd < 0)
80✔
140
                        return pidns_fd;
141
        }
142

143
        if (ret_mntns_fd) {
185✔
144
                mntns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_MOUNT, &need_verify);
81✔
145
                if (mntns_fd < 0)
81✔
146
                        return mntns_fd;
147
        }
148

149
        if (ret_netns_fd) {
185✔
150
                netns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_NET, &need_verify);
106✔
151
                if (netns_fd < 0)
106✔
152
                        return netns_fd;
153
        }
154

155
        if (ret_userns_fd) {
185✔
156
                userns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_USER, &need_verify);
12✔
157
                if (userns_fd < 0 && userns_fd != -ENOPKG)
12✔
158
                        return userns_fd;
159
        }
160

161
        if (ret_root_fd) {
185✔
162
                const char *root;
81✔
163

164
                root = procfs_file_alloca(pidref->pid, "root");
81✔
165
                root_fd = RET_NERRNO(open(root, O_CLOEXEC|O_DIRECTORY));
81✔
UNCOV
166
                if (root_fd == -ENOENT && proc_mounted() == 0)
×
167
                        return -ENOSYS;
168
                if (root_fd < 0)
81✔
169
                        return root_fd;
170

171
                need_verify = true;
81✔
172
        }
173

174
        if (need_verify) {
185✔
175
                r = pidref_verify(pidref);
185✔
176
                if (r < 0)
185✔
177
                        return r;
178
        }
179

180
        if (ret_pidns_fd)
185✔
181
                *ret_pidns_fd = TAKE_FD(pidns_fd);
80✔
182

183
        if (ret_mntns_fd)
185✔
184
                *ret_mntns_fd = TAKE_FD(mntns_fd);
81✔
185

186
        if (ret_netns_fd)
185✔
187
                *ret_netns_fd = TAKE_FD(netns_fd);
106✔
188

189
        if (ret_userns_fd)
185✔
190
                *ret_userns_fd = TAKE_FD(userns_fd);
12✔
191

192
        if (ret_root_fd)
185✔
193
                *ret_root_fd = TAKE_FD(root_fd);
81✔
194

195
        return 0;
196
}
197

198
int namespace_open(
199
                pid_t pid,
200
                int *ret_pidns_fd,
201
                int *ret_mntns_fd,
202
                int *ret_netns_fd,
203
                int *ret_userns_fd,
204
                int *ret_root_fd) {
205

206
        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
10✔
207
        int r;
10✔
208

209
        r = pidref_set_pid(&pidref, pid);
10✔
210
        if (r < 0)
10✔
211
                return r;
212

213
        return pidref_namespace_open(&pidref, ret_pidns_fd, ret_mntns_fd, ret_netns_fd, ret_userns_fd, ret_root_fd);
10✔
214
}
215

216
int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
217
        int r;
407✔
218

219
        if (userns_fd >= 0) {
407✔
220
                /* Can't setns to your own userns, since then you could escalate from non-root to root in
221
                 * your own namespace, so check if namespaces are equal before attempting to enter. */
222

223
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
1✔
224
                if (r < 0)
1✔
225
                        return r;
226
                if (r > 0)
1✔
227
                        userns_fd = -EBADF;
1✔
228
        }
229

230
        if (pidns_fd >= 0)
407✔
231
                if (setns(pidns_fd, CLONE_NEWPID) < 0)
58✔
UNCOV
232
                        return -errno;
×
233

234
        if (mntns_fd >= 0)
407✔
235
                if (setns(mntns_fd, CLONE_NEWNS) < 0)
286✔
UNCOV
236
                        return -errno;
×
237

238
        if (netns_fd >= 0)
407✔
239
                if (setns(netns_fd, CLONE_NEWNET) < 0)
121✔
UNCOV
240
                        return -errno;
×
241

242
        if (userns_fd >= 0)
407✔
UNCOV
243
                if (setns(userns_fd, CLONE_NEWUSER) < 0)
×
244
                        return -errno;
×
245

246
        if (root_fd >= 0) {
407✔
247
                if (fchdir(root_fd) < 0)
59✔
UNCOV
248
                        return -errno;
×
249

250
                if (chroot(".") < 0)
59✔
UNCOV
251
                        return -errno;
×
252
        }
253

254
        if (userns_fd >= 0)
407✔
UNCOV
255
                return reset_uid_gid();
×
256

257
        return 0;
258
}
259

260
int fd_is_namespace(int fd, NamespaceType type) {
261
        int r;
142✔
262

263
        /* Checks whether the specified file descriptor refers to a namespace (of type if type != _NAMESPACE_INVALID). */
264

265
        assert(fd >= 0);
142✔
266
        assert(type < _NAMESPACE_TYPE_MAX);
142✔
267

268
        r = fd_is_fs_type(fd, NSFS_MAGIC);
142✔
269
        if (r <= 0)
142✔
270
                return r;
271

272
        if (type < 0)
133✔
273
                return true;
274

275
        int clone_flag = ioctl(fd, NS_GET_NSTYPE);
133✔
276
        if (clone_flag < 0)
133✔
UNCOV
277
                return -errno;
×
278

279
        NamespaceType found_type = clone_flag_to_namespace_type(clone_flag);
133✔
280
        if (found_type < 0)
133✔
281
                return -EBADF; /* Uh? Unknown namespace type? */
282

283
        return found_type == type;
133✔
284
}
285

286
int is_our_namespace(int fd, NamespaceType type) {
287
        int r;
60✔
288

289
        assert(fd >= 0);
60✔
290
        assert(type < _NAMESPACE_TYPE_MAX);
60✔
291

292
        r = fd_is_namespace(fd, type);
60✔
293
        if (r < 0)
60✔
294
                return r;
60✔
295
        if (r == 0) /* Not a namespace or not of the right type? */
60✔
296
                return -EUCLEAN;
297

298
        _cleanup_close_ int our_ns = namespace_open_by_type(type);
120✔
299
        if (our_ns < 0)
60✔
300
                return our_ns;
301

302
        return fd_inode_same(fd, our_ns);
60✔
303
}
304

305
int namespace_is_init(NamespaceType type) {
306
        int r;
5,101✔
307

308
        assert(type >= 0);
5,101✔
309
        assert(type < _NAMESPACE_TYPE_MAX);
5,101✔
310

311
        if (namespace_info[type].root_inode == 0)
5,101✔
312
                return -EBADR; /* Cannot answer this question */
5,101✔
313

314
        const char *p = pid_namespace_path(0, type);
25,475✔
315

316
        struct stat st;
5,095✔
317
        r = RET_NERRNO(stat(p, &st));
5,095✔
318
        if (r == -ENOENT) {
16✔
319
                /* If the /proc/ns/<type> API is not around in /proc/ then ns is off in the kernel and we are in the init ns */
320
                r = proc_mounted();
16✔
321
                if (r < 0)
16✔
322
                        return -ENOENT; /* If we can't determine if /proc/ is mounted propagate original error */
323

324
                return r ? true : -ENOSYS;
16✔
325
        }
326
        if (r < 0)
5,079✔
327
                return r;
328

329
        return st.st_ino == namespace_info[type].root_inode;
5,079✔
330
}
331

332
int pidref_in_same_namespace(PidRef *pid1, PidRef *pid2, NamespaceType type) {
333
        _cleanup_close_ int ns1 = -EBADF, ns2 = -EBADF;
148✔
334

335
        /* Accepts NULL to indicate our own process */
336

337
        assert(!pid1 || pidref_is_set(pid1));
148✔
338
        assert(!pid2 || pidref_is_set(pid2));
148✔
339
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
148✔
340

341
        if (pidref_equal(pid1, pid2))
148✔
342
                return true;
343

344
        if (!pid1)
148✔
345
                ns1 = namespace_open_by_type(type);
132✔
346
        else
347
                ns1 = pidref_namespace_open_by_type(pid1, type);
16✔
348
        if (ns1 < 0)
148✔
349
                return ns1;
350

351
        if (!pid2)
148✔
352
                ns2 = namespace_open_by_type(type);
2✔
353
        else
354
                ns2 = pidref_namespace_open_by_type(pid2, type);
146✔
355
        if (ns2 < 0)
148✔
356
                return ns2;
357

358
        return fd_inode_same(ns1, ns2);
148✔
359
}
360

361
int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) {
362
        assert(pid1 >= 0);
2✔
363
        assert(pid2 >= 0);
2✔
364
        return pidref_in_same_namespace(pid1 == 0 ? NULL : &PIDREF_MAKE_FROM_PID(pid1),
2✔
UNCOV
365
                                        pid2 == 0 ? NULL : &PIDREF_MAKE_FROM_PID(pid2),
×
366
                                        type);
367
}
368

369
int namespace_get_leader(PidRef *pidref, NamespaceType type, PidRef *ret) {
370
        int r;
5✔
371

372
        /* Note: we don't bother with pidref_is_set()/pidref_is_remote() here, as the first call we do,
373
         * pidref_get_ppid_as_pidref() calls those anyway */
374

375
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
5✔
376
        assert(ret);
5✔
377

378
        _cleanup_(pidref_done) PidRef current = PIDREF_NULL;
5✔
379
        PidRef *c = pidref;
5✔
380

381
        for (;;) {
9✔
382
                _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
5✔
383

384
                r = pidref_get_ppid_as_pidref(c, &parent);
14✔
385
                if (r < 0)
14✔
386
                        return r;
387

388
                r = pidref_in_same_namespace(c, &parent, type);
14✔
389
                if (r < 0)
14✔
390
                        return r;
391
                if (r == 0) {
14✔
392
                        /* If the parent and the child are not in the same namespace, then the child is
393
                         * the leader we are looking for. */
394

395
                        if (pidref_is_set(&current))
5✔
396
                                *ret = TAKE_PIDREF(current);
5✔
397
                        else {
UNCOV
398
                                r = pidref_copy(c, ret);
×
399
                                if (r < 0)
×
400
                                        return r;
401
                        }
402

403
                        return 0;
5✔
404
                }
405

406
                pidref_done(&current);
9✔
407
                current = TAKE_PIDREF(parent);
9✔
408
                c = &current;
9✔
409
        }
410
}
411

412
int detach_mount_namespace(void) {
413
        /* Detaches the mount namespace, disabling propagation from our namespace to the host. Sets
414
         * propagation first to MS_SLAVE for all mounts (disabling propagation), and then back to MS_SHARED
415
         * (so that we create a new peer group).  */
416

417
        if (unshare(CLONE_NEWNS) < 0)
201✔
UNCOV
418
                return log_debug_errno(errno, "Failed to acquire mount namespace: %m");
×
419

420
        if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
201✔
UNCOV
421
                return log_debug_errno(errno, "Failed to set mount propagation to MS_SLAVE for all mounts: %m");
×
422

423
        if (mount(NULL, "/", NULL, MS_SHARED | MS_REC, NULL) < 0)
201✔
UNCOV
424
                return log_debug_errno(errno, "Failed to set mount propagation back to MS_SHARED for all mounts: %m");
×
425

426
        return 0;
427
}
428

429
int detach_mount_namespace_harder(uid_t target_uid, gid_t target_gid) {
430
        uid_t from_uid;
53✔
431
        gid_t from_gid;
53✔
432
        int r;
53✔
433

434
        /* Tried detach_mount_namespace() first. If that doesn't work due to permissions, opens up an
435
         * unprivileged user namespace with a mapping of the originating UID/GID to the specified target
436
         * UID/GID. Then, tries detach_mount_namespace() again.
437
         *
438
         * Or in other words: tries much harder to get a mount namespace, making use of unprivileged user
439
         * namespaces if need be.
440
         *
441
         * Note that after this function completed:
442
         *
443
         *    → if we had privs, afterwards uids/gids on files and processes are as before
444
         *
445
         *    → if we had no privs, our own id and all our files will show up owned by target_uid/target_gid,
446
         *    and everything else owned by nobody.
447
         *
448
         * Yes, that's quite a difference. */
449

450
        if (!uid_is_valid(target_uid))
53✔
451
                return -EINVAL;
452
        if (!gid_is_valid(target_gid))
53✔
453
                return -EINVAL;
454

455
        r = detach_mount_namespace();
53✔
456
        if (r != -EPERM)
53✔
457
                return r;
458

UNCOV
459
        from_uid = getuid();
×
460
        from_gid = getgid();
×
461

UNCOV
462
        if (unshare(CLONE_NEWUSER) < 0)
×
463
                return log_debug_errno(errno, "Failed to acquire user namespace: %m");
×
464

UNCOV
465
        r = write_string_filef("/proc/self/uid_map", 0,
×
466
                               UID_FMT " " UID_FMT " 1\n", target_uid, from_uid);
UNCOV
467
        if (r < 0)
×
468
                return log_debug_errno(r, "Failed to write uid map: %m");
×
469

UNCOV
470
        r = write_string_file("/proc/self/setgroups", "deny", 0);
×
471
        if (r < 0)
×
472
                return log_debug_errno(r, "Failed to write setgroups file: %m");
×
473

UNCOV
474
        r = write_string_filef("/proc/self/gid_map", 0,
×
475
                               GID_FMT " " GID_FMT " 1\n", target_gid, from_gid);
UNCOV
476
        if (r < 0)
×
477
                return log_debug_errno(r, "Failed to write gid map: %m");
×
478

UNCOV
479
        return detach_mount_namespace();
×
480
}
481

482
int detach_mount_namespace_userns(int userns_fd) {
483
        int r;
2✔
484

485
        assert(userns_fd >= 0);
2✔
486

487
        if (setns(userns_fd, CLONE_NEWUSER) < 0)
2✔
UNCOV
488
                return log_debug_errno(errno, "Failed to join user namespace: %m");
×
489

490
        r = reset_uid_gid();
2✔
491
        if (r < 0)
2✔
UNCOV
492
                return log_debug_errno(r, "Failed to become root in user namespace: %m");
×
493

494
        return detach_mount_namespace();
2✔
495
}
496

497
int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_range) {
498
        _cleanup_free_ char *buffer = NULL;
2✔
499
        const char *range, *shift;
2✔
500
        int r;
2✔
501
        uid_t uid_shift, uid_range = 65536;
2✔
502

503
        assert(s);
2✔
504

505
        range = strchr(s, ':');
2✔
506
        if (range) {
2✔
UNCOV
507
                buffer = strndup(s, range - s);
×
508
                if (!buffer)
×
509
                        return log_oom();
×
510
                shift = buffer;
×
511

UNCOV
512
                range++;
×
513
                r = safe_atou32(range, &uid_range);
×
514
                if (r < 0)
×
515
                        return log_error_errno(r, "Failed to parse UID range \"%s\": %m", range);
×
516
        } else
517
                shift = s;
518

519
        r = parse_uid(shift, &uid_shift);
2✔
520
        if (r < 0)
2✔
521
                return log_error_errno(r, "Failed to parse UID \"%s\": %m", s);
2✔
522

UNCOV
523
        if (!userns_shift_range_valid(uid_shift, uid_range))
×
524
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID range cannot be empty or go beyond " UID_FMT ".", UID_INVALID);
×
525

UNCOV
526
        if (ret_uid_shift)
×
527
                *ret_uid_shift = uid_shift;
×
528

UNCOV
529
        if (ret_uid_range)
×
530
                *ret_uid_range = uid_range;
×
531

532
        return 0;
533
}
534

535
int userns_acquire_empty(void) {
536
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
22✔
537
        int r;
22✔
538

539
        r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
22✔
540
        if (r < 0)
22✔
541
                return r;
542
        assert(r > 0);
22✔
543

544
        return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
22✔
545
}
546

547
int userns_acquire(const char *uid_map, const char *gid_map, bool setgroups_deny) {
548
        char path[STRLEN("/proc//setgroups") + DECIMAL_STR_MAX(pid_t) + 1];
3,969✔
549
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
3,969✔
550
        int r;
3,969✔
551

552
        assert(uid_map);
3,969✔
553
        assert(gid_map);
3,969✔
554

555
        /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it,
556
         * and then kills the process again. This way we have a userns fd that is not bound to any
557
         * process. We can use that for file system mounts and similar. */
558

559
        r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
3,969✔
560
        if (r < 0)
3,969✔
561
                return r;
562
        assert(r > 0);
3,969✔
563

564
        xsprintf(path, "/proc/" PID_FMT "/uid_map", pid.pid);
3,969✔
565
        r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
3,969✔
566
        if (r < 0)
3,969✔
UNCOV
567
                return log_debug_errno(r, "Failed to write UID map: %m");
×
568

569
        if (setgroups_deny) {
3,969✔
570
                xsprintf(path, "/proc/" PID_FMT "/setgroups", pid.pid);
3,969✔
571
                r = write_string_file(path, "deny", WRITE_STRING_FILE_DISABLE_BUFFER);
3,969✔
572
                if (r < 0)
3,969✔
UNCOV
573
                        return log_debug_errno(r, "Failed to write setgroups file: %m");
×
574
        }
575

576
        xsprintf(path, "/proc/" PID_FMT "/gid_map", pid.pid);
3,969✔
577
        r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
3,969✔
578
        if (r < 0)
3,969✔
UNCOV
579
                return log_debug_errno(r, "Failed to write GID map: %m");
×
580

581
        return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
3,969✔
582
}
583

584
int userns_acquire_self_root(void) {
585

586
        /* Returns a user namespace with only our own uid/gid mapped to root, and everything else unmapped.
587
         *
588
         * Note: this can be acquired unprivileged! */
589

590
        _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
3,825✔
591
        if (asprintf(&uid_map, "0 " UID_FMT " 1", getuid()) < 0)
3,825✔
592
                return -ENOMEM;
593
        if (asprintf(&gid_map, "0 " GID_FMT " 1", getgid()) < 0)
3,825✔
594
                return -ENOMEM;
595

596
        return userns_acquire(uid_map, gid_map, /* setgroups_deny= */ true);
3,825✔
597
}
598

599
int userns_enter_and_pin(int userns_fd, pid_t *ret_pid) {
600
        _cleanup_close_pair_ int pfd[2] = EBADF_PAIR;
38✔
601
        _cleanup_(sigkill_waitp) pid_t pid = 0;
38✔
602
        ssize_t n;
38✔
603
        char x;
38✔
604
        int r;
38✔
605

606
        assert(userns_fd >= 0);
38✔
607
        assert(ret_pid);
38✔
608

609
        if (pipe2(pfd, O_CLOEXEC) < 0)
38✔
UNCOV
610
                return -errno;
×
611

612
        r = safe_fork_full(
76✔
613
                        "(sd-pinuserns)",
614
                        /* stdio_fds= */ NULL,
615
                        (int[]) { pfd[1], userns_fd }, 2,
38✔
616
                        FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL,
617
                        &pid);
618
        if (r < 0)
38✔
619
                return r;
620
        if (r == 0) {
38✔
621
                /* Child. */
622

UNCOV
623
                if (setns(userns_fd, CLONE_NEWUSER) < 0) {
×
624
                        log_debug_errno(errno, "Failed to join userns: %m");
×
625
                        _exit(EXIT_FAILURE);
×
626
                }
627

UNCOV
628
                userns_fd = safe_close(userns_fd);
×
629

UNCOV
630
                n = write(pfd[1], &(const char) { 'x' }, 1);
×
631
                if (n < 0) {
×
632
                        log_debug_errno(errno, "Failed to write to pipe: %m");
×
633
                        _exit(EXIT_FAILURE);
×
634
                }
UNCOV
635
                assert(n == 1);
×
636

UNCOV
637
                freeze();
×
638
        }
639

640
        pfd[1] = safe_close(pfd[1]);
38✔
641

642
        n = read(pfd[0], &x, 1);
38✔
643
        if (n < 0)
38✔
UNCOV
644
                return -errno;
×
645
        if (n == 0)
38✔
646
                return -EPROTO;
647
        assert(n == 1);
38✔
648
        assert(x == 'x');
38✔
649

650
        *ret_pid = TAKE_PID(pid);
38✔
651
        return 0;
38✔
652
}
653

654
bool userns_supported(void) {
655
        return access("/proc/self/uid_map", F_OK) >= 0;
247✔
656
}
657

658
int userns_get_base_uid(int userns_fd, uid_t *ret_uid, gid_t *ret_gid) {
659
        _cleanup_(sigkill_waitp) pid_t pid = 0;
24✔
660
        int r;
24✔
661

662
        assert(userns_fd >= 0);
24✔
663

664
        r = userns_enter_and_pin(userns_fd, &pid);
24✔
665
        if (r < 0)
24✔
666
                return r;
667

668
        uid_t uid;
24✔
669
        r = uid_map_search_root(pid, UID_RANGE_USERNS_OUTSIDE, &uid);
24✔
670
        if (r < 0)
24✔
671
                return r;
672

673
        gid_t gid;
21✔
674
        r = uid_map_search_root(pid, GID_RANGE_USERNS_OUTSIDE, &gid);
21✔
675
        if (r < 0)
21✔
676
                return r;
677

678
        if (!ret_gid && uid != gid)
21✔
679
                return -EUCLEAN;
680

681
        if (ret_uid)
18✔
682
                *ret_uid = uid;
18✔
683
        if (ret_gid)
18✔
684
                *ret_gid = gid;
3✔
685

686
        return 0;
687
}
688

689
int process_is_owned_by_uid(const PidRef *pidref, uid_t uid) {
690
        int r;
9✔
691

692
        /* Checks if the specified process either is owned directly by the specified user, or if it is inside
693
         * a user namespace owned by it. */
694

695
        assert(uid_is_valid(uid));
9✔
696

697
        uid_t process_uid;
9✔
698
        r = pidref_get_uid(pidref, &process_uid);
9✔
699
        if (r < 0)
9✔
700
                return r;
9✔
701
        if (process_uid == uid)
9✔
702
                return true;
703

704
        _cleanup_close_ int userns_fd = -EBADF;
9✔
705
        userns_fd = pidref_namespace_open_by_type(pidref, NAMESPACE_USER);
6✔
706
        if (userns_fd == -ENOPKG) /* If userns is not supported, then they don't matter for ownership */
6✔
707
                return false;
708
        if (userns_fd < 0)
6✔
709
                return userns_fd;
710

UNCOV
711
        for (unsigned iteration = 0;; iteration++) {
×
712
                uid_t ns_uid;
6✔
713

714
                /* This process is in our own userns? Then we are done, in our own userns only the UIDs
715
                 * themselves matter. */
716
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
6✔
717
                if (r < 0)
6✔
718
                        return r;
6✔
719
                if (r > 0)
6✔
720
                        return false;
721

722
                if (ioctl(userns_fd, NS_GET_OWNER_UID, &ns_uid) < 0)
3✔
UNCOV
723
                        return -errno;
×
724
                if (ns_uid == uid)
3✔
725
                        return true;
726

727
                /* Paranoia check */
UNCOV
728
                if (iteration > 16)
×
729
                        return log_debug_errno(SYNTHETIC_ERRNO(ELOOP), "Giving up while tracing parents of user namespaces after %u steps.", iteration);
×
730

731
                /* Go up the tree */
732
                _cleanup_close_ int parent_fd = ioctl(userns_fd, NS_GET_USERNS);
6✔
UNCOV
733
                if (parent_fd < 0) {
×
734
                        if (errno == EPERM) /* EPERM means we left our own userns */
×
735
                                return false;
736

UNCOV
737
                        return -errno;
×
738
                }
739

UNCOV
740
                close_and_replace(userns_fd, parent_fd);
×
741
        }
742
}
743

744
int is_idmapping_supported(const char *path) {
745
        _cleanup_close_ int mount_fd = -EBADF, userns_fd = -EBADF, dir_fd = -EBADF;
7,648✔
746
        int r;
3,824✔
747

748
        assert(path);
3,824✔
749

750
        if (!mount_new_api_supported())
3,824✔
751
                return false;
752

753
        userns_fd = r = userns_acquire_self_root();
3,824✔
754
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
3,824✔
755
                return false;
756
        if (r == -ENOSPC) {
3,824✔
UNCOV
757
                log_debug_errno(r, "Failed to acquire new user namespace, user.max_user_namespaces seems to be exhausted or maybe even zero, assuming ID-mapping is not supported: %m");
×
758
                return false;
×
759
        }
760
        if (r < 0)
3,824✔
UNCOV
761
                return log_debug_errno(r, "Failed to acquire new user namespace for checking if '%s' supports ID-mapping: %m", path);
×
762

763
        dir_fd = r = RET_NERRNO(open(path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
3,824✔
764
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
7,648✔
765
                return false;
766
        if (r < 0)
3,824✔
UNCOV
767
                return log_debug_errno(r, "Failed to open '%s', cannot determine if ID-mapping is supported: %m", path);
×
768

769
        mount_fd = r = RET_NERRNO(open_tree(dir_fd, "", AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC));
3,824✔
770
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
3,824✔
UNCOV
771
                return false;
×
772
        if (r < 0)
3,824✔
UNCOV
773
                return log_debug_errno(r, "Failed to open mount tree '%s', cannot determine if ID-mapping is supported: %m", path);
×
774

775
        r = RET_NERRNO(mount_setattr(mount_fd, "", AT_EMPTY_PATH,
3,824✔
776
                       &(struct mount_attr) {
3,824✔
777
                                .attr_set = MOUNT_ATTR_IDMAP | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RDONLY | MOUNT_ATTR_NODEV,
778
                                .userns_fd = userns_fd,
779
                        }, sizeof(struct mount_attr)));
780
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
3,824✔
781
                return false;
6✔
782
        if (r < 0)
3,818✔
UNCOV
783
                return log_debug_errno(r, "Failed to set mount attribute to '%s', cannot determine if ID-mapping is supported: %m", path);
×
784

785
        return true;
786
}
787

788
int netns_acquire(void) {
789
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
7✔
790
        int r;
7✔
791

792
        /* Forks off a process in a new network namespace, acquires a network namespace fd, and then kills
793
         * the process again. This way we have a netns fd that is not bound to any process. */
794

795
        r = pidref_safe_fork("(sd-mknetns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_NETNS|FORK_FREEZE, &pid);
7✔
796
        if (r < 0)
7✔
UNCOV
797
                return log_debug_errno(r, "Failed to fork process into new netns: %m");
×
798
        assert(r > 0);
7✔
799

800
        return pidref_namespace_open_by_type(&pid, NAMESPACE_NET);
7✔
801
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc