• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 14345988482

08 Apr 2025 09:45PM UTC coverage: 71.913% (+0.006%) from 71.907%
14345988482

push

github

yuwata
test: Improve coverage in test-memfd-util and use ASSERT_OK() macro and friends

14 of 16 new or added lines in 1 file covered. (87.5%)

4696 existing lines in 76 files now uncovered.

296867 of 412812 relevant lines covered (71.91%)

666289.39 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.39
/src/basic/namespace-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <fcntl.h>
4
#include <sys/ioctl.h>
5
#include <sys/mount.h>
6

7
#include "errno-util.h"
8
#include "fd-util.h"
9
#include "fileio.h"
10
#include "missing_fs.h"
11
#include "missing_magic.h"
12
#include "missing_namespace.h"
13
#include "missing_sched.h"
14
#include "missing_syscall.h"
15
#include "mountpoint-util.h"
16
#include "namespace-util.h"
17
#include "parse-util.h"
18
#include "pidfd-util.h"
19
#include "process-util.h"
20
#include "stat-util.h"
21
#include "stdio-util.h"
22
#include "uid-range.h"
23
#include "user-util.h"
24

25
const struct namespace_info namespace_info[_NAMESPACE_TYPE_MAX + 1] = {
26
        [NAMESPACE_CGROUP] =  { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, PROC_CGROUP_INIT_INO },
27
        [NAMESPACE_IPC]    =  { "ipc",    "ns/ipc",    CLONE_NEWIPC,    PIDFD_GET_IPC_NAMESPACE,    PROC_IPC_INIT_INO    },
28
        [NAMESPACE_NET]    =  { "net",    "ns/net",    CLONE_NEWNET,    PIDFD_GET_NET_NAMESPACE,    0                    },
29
        /* So, the mount namespace flag is called CLONE_NEWNS for historical
30
         * reasons. Let's expose it here under a more explanatory name: "mnt".
31
         * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
32
        [NAMESPACE_MOUNT]  =  { "mnt",    "ns/mnt",    CLONE_NEWNS,     PIDFD_GET_MNT_NAMESPACE,    0                    },
33
        [NAMESPACE_PID]    =  { "pid",    "ns/pid",    CLONE_NEWPID,    PIDFD_GET_PID_NAMESPACE,    PROC_PID_INIT_INO    },
34
        [NAMESPACE_USER]   =  { "user",   "ns/user",   CLONE_NEWUSER,   PIDFD_GET_USER_NAMESPACE,   PROC_USER_INIT_INO   },
35
        [NAMESPACE_UTS]    =  { "uts",    "ns/uts",    CLONE_NEWUTS,    PIDFD_GET_UTS_NAMESPACE,    PROC_UTS_INIT_INO    },
36
        [NAMESPACE_TIME]   =  { "time",   "ns/time",   CLONE_NEWTIME,   PIDFD_GET_TIME_NAMESPACE,   PROC_TIME_INIT_INO   },
37
        {}, /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */
38
};
39

40
#define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path)
41

42
NamespaceType clone_flag_to_namespace_type(unsigned long clone_flag) {
85✔
43
        for (NamespaceType t = 0; t < _NAMESPACE_TYPE_MAX; t++)
354✔
44
                if (((namespace_info[t].clone_flag ^ clone_flag) & (CLONE_NEWCGROUP|CLONE_NEWIPC|CLONE_NEWNET|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUSER|CLONE_NEWUTS|CLONE_NEWTIME)) == 0)
354✔
45
                        return t;
46

47
        return _NAMESPACE_TYPE_INVALID;
48
}
49

50
static int pidref_namespace_open_by_type_internal(const PidRef *pidref, NamespaceType type, bool *need_verify) {
8,123✔
51
        int r;
8,123✔
52

53
        assert(pidref_is_set(pidref));
8,123✔
54
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
8,123✔
55

56
        if (pidref_is_remote(pidref))
8,123✔
57
                return -EREMOTE;
8,123✔
58

59
        if (pidref->fd >= 0) {
8,123✔
60
                r = pidfd_get_namespace(pidref->fd, namespace_info[type].pidfd_get_ns_ioctl_cmd);
8,121✔
61
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
8,121✔
62
                        return r;
63
        }
64

65
        if (need_verify) /* The caller shall call pidref_verify() later */
1,933✔
66
                *need_verify = true;
219✔
67

68
        _cleanup_close_ int nsfd = -EBADF;
8,123✔
69
        const char *p;
1,933✔
70

71
        p = pid_namespace_path(pidref->pid, type);
1,933✔
72
        nsfd = RET_NERRNO(open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC));
1,933✔
73
        if (nsfd == -ENOENT) {
×
74
                r = proc_mounted();
×
75
                if (r == 0)
×
76
                        return -ENOSYS;  /* /proc/ is not available or not set up properly, we're most likely
77
                                            in some chroot environment. */
78
                if (r > 0)
×
79
                        return -ENOPKG;  /* If /proc/ is definitely around then this means the namespace type is not supported */
80

81
                /* can't determine? then propagate original error */
82
        }
83
        if (nsfd < 0)
1,933✔
84
                return nsfd;
85

86
        if (!need_verify) { /* Otherwise we verify on our own */
1,933✔
87
                r = pidref_verify(pidref);
1,714✔
88
                if (r < 0)
1,714✔
89
                        return r;
×
90
        }
91

92
        return TAKE_FD(nsfd);
93
}
94

95
int pidref_namespace_open_by_type(const PidRef *pidref, NamespaceType type) {
7,904✔
96
        return pidref_namespace_open_by_type_internal(pidref, type, NULL);
7,904✔
97
}
98

99
int namespace_open_by_type(NamespaceType type) {
3,917✔
100
        _cleanup_(pidref_done) PidRef self = PIDREF_NULL;
3,917✔
101
        int r;
3,917✔
102

103
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
3,917✔
104

105
        r = pidref_set_self(&self);
3,917✔
106
        if (r < 0)
3,917✔
107
                return r;
108

109
        return pidref_namespace_open_by_type(&self, type);
3,917✔
110
}
111

112
int pidref_namespace_open(
155✔
113
                const PidRef *pidref,
114
                int *ret_pidns_fd,
115
                int *ret_mntns_fd,
116
                int *ret_netns_fd,
117
                int *ret_userns_fd,
118
                int *ret_root_fd) {
119

120
        _cleanup_close_ int pidns_fd = -EBADF, mntns_fd = -EBADF, netns_fd = -EBADF,
310✔
121
                userns_fd = -EBADF, root_fd = -EBADF;
310✔
122
        bool need_verify = false;
155✔
123
        int r;
155✔
124

125
        assert(pidref_is_set(pidref));
155✔
126

127
        if (pidref_is_remote(pidref))
310✔
128
                return -EREMOTE;
129

130
        if (ret_pidns_fd) {
155✔
131
                pidns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_PID, &need_verify);
50✔
132
                if (pidns_fd < 0)
50✔
133
                        return pidns_fd;
134
        }
135

136
        if (ret_mntns_fd) {
155✔
137
                mntns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_MOUNT, &need_verify);
51✔
138
                if (mntns_fd < 0)
51✔
139
                        return mntns_fd;
140
        }
141

142
        if (ret_netns_fd) {
155✔
143
                netns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_NET, &need_verify);
106✔
144
                if (netns_fd < 0)
106✔
145
                        return netns_fd;
146
        }
147

148
        if (ret_userns_fd) {
155✔
149
                userns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_USER, &need_verify);
12✔
150
                if (userns_fd < 0 && userns_fd != -ENOPKG)
12✔
151
                        return userns_fd;
152
        }
153

154
        if (ret_root_fd) {
155✔
155
                const char *root;
51✔
156

157
                root = procfs_file_alloca(pidref->pid, "root");
51✔
158
                root_fd = RET_NERRNO(open(root, O_CLOEXEC|O_DIRECTORY));
51✔
159
                if (root_fd == -ENOENT && proc_mounted() == 0)
×
160
                        return -ENOSYS;
161
                if (root_fd < 0)
51✔
162
                        return root_fd;
163

164
                need_verify = true;
51✔
165
        }
166

167
        if (need_verify) {
155✔
168
                r = pidref_verify(pidref);
155✔
169
                if (r < 0)
155✔
170
                        return r;
171
        }
172

173
        if (ret_pidns_fd)
155✔
174
                *ret_pidns_fd = TAKE_FD(pidns_fd);
50✔
175

176
        if (ret_mntns_fd)
155✔
177
                *ret_mntns_fd = TAKE_FD(mntns_fd);
51✔
178

179
        if (ret_netns_fd)
155✔
180
                *ret_netns_fd = TAKE_FD(netns_fd);
106✔
181

182
        if (ret_userns_fd)
155✔
183
                *ret_userns_fd = TAKE_FD(userns_fd);
12✔
184

185
        if (ret_root_fd)
155✔
186
                *ret_root_fd = TAKE_FD(root_fd);
51✔
187

188
        return 0;
189
}
190

191
int namespace_open(
10✔
192
                pid_t pid,
193
                int *ret_pidns_fd,
194
                int *ret_mntns_fd,
195
                int *ret_netns_fd,
196
                int *ret_userns_fd,
197
                int *ret_root_fd) {
198

199
        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
10✔
200
        int r;
10✔
201

202
        r = pidref_set_pid(&pidref, pid);
10✔
203
        if (r < 0)
10✔
204
                return r;
205

206
        return pidref_namespace_open(&pidref, ret_pidns_fd, ret_mntns_fd, ret_netns_fd, ret_userns_fd, ret_root_fd);
10✔
207
}
208

209
int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
353✔
210
        int r;
353✔
211

212
        if (userns_fd >= 0) {
353✔
213
                /* Can't setns to your own userns, since then you could escalate from non-root to root in
214
                 * your own namespace, so check if namespaces are equal before attempting to enter. */
215

216
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
1✔
217
                if (r < 0)
1✔
218
                        return r;
219
                if (r > 0)
1✔
220
                        userns_fd = -EBADF;
1✔
221
        }
222

223
        if (pidns_fd >= 0)
353✔
224
                if (setns(pidns_fd, CLONE_NEWPID) < 0)
39✔
225
                        return -errno;
×
226

227
        if (mntns_fd >= 0)
353✔
228
                if (setns(mntns_fd, CLONE_NEWNS) < 0)
232✔
229
                        return -errno;
×
230

231
        if (netns_fd >= 0)
353✔
232
                if (setns(netns_fd, CLONE_NEWNET) < 0)
121✔
233
                        return -errno;
×
234

235
        if (userns_fd >= 0)
353✔
236
                if (setns(userns_fd, CLONE_NEWUSER) < 0)
×
237
                        return -errno;
×
238

239
        if (root_fd >= 0) {
353✔
240
                if (fchdir(root_fd) < 0)
40✔
241
                        return -errno;
×
242

243
                if (chroot(".") < 0)
40✔
244
                        return -errno;
×
245
        }
246

247
        if (userns_fd >= 0)
353✔
248
                return reset_uid_gid();
×
249

250
        return 0;
251
}
252

253
int fd_is_namespace(int fd, NamespaceType type) {
94✔
254
        int r;
94✔
255

256
        /* Checks whether the specified file descriptor refers to a namespace (of type if type != _NAMESPACE_INVALID). */
257

258
        assert(fd >= 0);
94✔
259
        assert(type < _NAMESPACE_TYPE_MAX);
94✔
260

261
        r = fd_is_fs_type(fd, NSFS_MAGIC);
94✔
262
        if (r <= 0)
94✔
263
                return r;
264

265
        if (type < 0)
85✔
266
                return true;
267

268
        int clone_flag = ioctl(fd, NS_GET_NSTYPE);
85✔
269
        if (clone_flag < 0)
85✔
270
                return -errno;
×
271

272
        NamespaceType found_type = clone_flag_to_namespace_type(clone_flag);
85✔
273
        if (found_type < 0)
85✔
274
                return -EBADF; /* Uh? Unknown namespace type? */
275

276
        return found_type == type;
85✔
277
}
278

279
int is_our_namespace(int fd, NamespaceType type) {
22✔
280
        int r;
22✔
281

282
        assert(fd >= 0);
22✔
283
        assert(type < _NAMESPACE_TYPE_MAX);
22✔
284

285
        r = fd_is_namespace(fd, type);
22✔
286
        if (r < 0)
22✔
287
                return r;
22✔
288
        if (r == 0) /* Not a namespace or not of the right type? */
22✔
289
                return -EUCLEAN;
290

291
        _cleanup_close_ int our_ns = namespace_open_by_type(type);
44✔
292
        if (our_ns < 0)
22✔
293
                return our_ns;
294

295
        return fd_inode_same(fd, our_ns);
22✔
296
}
297

298
int namespace_is_init(NamespaceType type) {
4,587✔
299
        int r;
4,587✔
300

301
        assert(type >= 0);
4,587✔
302
        assert(type < _NAMESPACE_TYPE_MAX);
4,587✔
303

304
        if (namespace_info[type].root_inode == 0)
4,587✔
305
                return -EBADR; /* Cannot answer this question */
4,587✔
306

307
        const char *p = pid_namespace_path(0, type);
4,581✔
308

309
        struct stat st;
4,581✔
310
        r = RET_NERRNO(stat(p, &st));
4,581✔
311
        if (r == -ENOENT) {
12✔
312
                /* If the /proc/ns/<type> API is not around in /proc/ then ns is off in the kernel and we are in the init ns */
313
                r = proc_mounted();
12✔
314
                if (r < 0)
12✔
315
                        return -ENOENT; /* If we can't determine if /proc/ is mounted propagate original error */
316

317
                return r ? true : -ENOSYS;
12✔
318
        }
319
        if (r < 0)
4,569✔
320
                return r;
321

322
        return st.st_ino == namespace_info[type].root_inode;
4,569✔
323
}
324

325
int pidref_in_same_namespace(PidRef *pid1, PidRef *pid2, NamespaceType type) {
136✔
326
        _cleanup_close_ int ns1 = -EBADF, ns2 = -EBADF;
136✔
327

328
        /* Accepts NULL to indicate our own process */
329

330
        assert(!pid1 || pidref_is_set(pid1));
136✔
331
        assert(!pid2 || pidref_is_set(pid2));
136✔
332
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
136✔
333

334
        if (pidref_equal(pid1, pid2))
136✔
335
                return true;
336

337
        if (!pid1)
136✔
338
                ns1 = namespace_open_by_type(type);
120✔
339
        else
340
                ns1 = pidref_namespace_open_by_type(pid1, type);
16✔
341
        if (ns1 < 0)
136✔
342
                return ns1;
343

344
        if (!pid2)
136✔
345
                ns2 = namespace_open_by_type(type);
2✔
346
        else
347
                ns2 = pidref_namespace_open_by_type(pid2, type);
134✔
348
        if (ns2 < 0)
136✔
349
                return ns2;
350

351
        return fd_inode_same(ns1, ns2);
136✔
352
}
353

354
int namespace_get_leader(PidRef *pidref, NamespaceType type, PidRef *ret) {
5✔
355
        int r;
5✔
356

357
        /* Note: we don't bother with pidref_is_set()/pidref_is_remote() here, as the first call we do,
358
         * pidref_get_ppid_as_pidref() calls those anyway */
359

360
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
5✔
361
        assert(ret);
5✔
362

363
        _cleanup_(pidref_done) PidRef current = PIDREF_NULL;
5✔
364
        PidRef *c = pidref;
5✔
365

366
        for (;;) {
9✔
367
                _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
5✔
368

369
                r = pidref_get_ppid_as_pidref(c, &parent);
14✔
370
                if (r < 0)
14✔
371
                        return r;
372

373
                r = pidref_in_same_namespace(c, &parent, type);
14✔
374
                if (r < 0)
14✔
375
                        return r;
376
                if (r == 0) {
14✔
377
                        /* If the parent and the child are not in the same namespace, then the child is
378
                         * the leader we are looking for. */
379

380
                        if (pidref_is_set(&current))
5✔
381
                                *ret = TAKE_PIDREF(current);
5✔
382
                        else {
383
                                r = pidref_copy(c, ret);
×
384
                                if (r < 0)
×
385
                                        return r;
386
                        }
387

388
                        return 0;
5✔
389
                }
390

391
                pidref_done(&current);
9✔
392
                current = TAKE_PIDREF(parent);
9✔
393
                c = &current;
9✔
394
        }
395
}
396

397
int detach_mount_namespace(void) {
196✔
398
        /* Detaches the mount namespace, disabling propagation from our namespace to the host. Sets
399
         * propagation first to MS_SLAVE for all mounts (disabling propagation), and then back to MS_SHARED
400
         * (so that we create a new peer group).  */
401

402
        if (unshare(CLONE_NEWNS) < 0)
196✔
403
                return log_debug_errno(errno, "Failed to acquire mount namespace: %m");
×
404

405
        if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
196✔
406
                return log_debug_errno(errno, "Failed to set mount propagation to MS_SLAVE for all mounts: %m");
×
407

408
        if (mount(NULL, "/", NULL, MS_SHARED | MS_REC, NULL) < 0)
196✔
409
                return log_debug_errno(errno, "Failed to set mount propagation back to MS_SHARED for all mounts: %m");
×
410

411
        return 0;
412
}
413

414
int detach_mount_namespace_harder(uid_t target_uid, gid_t target_gid) {
53✔
415
        uid_t from_uid;
53✔
416
        gid_t from_gid;
53✔
417
        int r;
53✔
418

419
        /* Tried detach_mount_namespace() first. If that doesn't work due to permissions, opens up an
420
         * unprivileged user namespace with a mapping of the originating UID/GID to the specified target
421
         * UID/GID. Then, tries detach_mount_namespace() again.
422
         *
423
         * Or in other words: tries much harder to get a mount namespace, making use of unprivileged user
424
         * namespaces if need be.
425
         *
426
         * Note that after this function completed:
427
         *
428
         *    → if we had privs, afterwards uids/gids on files and processes are as before
429
         *
430
         *    → if we had no privs, our own id and all our files will show up owned by target_uid/target_gid,
431
         *    and everything else owned by nobody.
432
         *
433
         * Yes, that's quite a difference. */
434

435
        if (!uid_is_valid(target_uid))
53✔
436
                return -EINVAL;
437
        if (!gid_is_valid(target_gid))
53✔
438
                return -EINVAL;
439

440
        r = detach_mount_namespace();
53✔
441
        if (r != -EPERM)
53✔
442
                return r;
443

UNCOV
444
        from_uid = getuid();
×
445
        from_gid = getgid();
×
446

447
        if (unshare(CLONE_NEWUSER) < 0)
×
448
                return log_debug_errno(errno, "Failed to acquire user namespace: %m");
×
449

450
        r = write_string_filef("/proc/self/uid_map", 0,
×
451
                               UID_FMT " " UID_FMT " 1\n", target_uid, from_uid);
452
        if (r < 0)
×
UNCOV
453
                return log_debug_errno(r, "Failed to write uid map: %m");
×
454

UNCOV
455
        r = write_string_file("/proc/self/setgroups", "deny", 0);
×
456
        if (r < 0)
×
457
                return log_debug_errno(r, "Failed to write setgroups file: %m");
×
458

459
        r = write_string_filef("/proc/self/gid_map", 0,
×
460
                               GID_FMT " " GID_FMT " 1\n", target_gid, from_gid);
UNCOV
461
        if (r < 0)
×
UNCOV
462
                return log_debug_errno(r, "Failed to write gid map: %m");
×
463

UNCOV
464
        return detach_mount_namespace();
×
465
}
466

467
int detach_mount_namespace_userns(int userns_fd) {
2✔
468
        int r;
2✔
469

470
        assert(userns_fd >= 0);
2✔
471

472
        if (setns(userns_fd, CLONE_NEWUSER) < 0)
2✔
UNCOV
473
                return log_debug_errno(errno, "Failed to join user namespace: %m");
×
474

475
        r = reset_uid_gid();
2✔
476
        if (r < 0)
2✔
UNCOV
477
                return log_debug_errno(r, "Failed to become root in user namespace: %m");
×
478

479
        return detach_mount_namespace();
2✔
480
}
481

482
int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_range) {
2✔
483
        _cleanup_free_ char *buffer = NULL;
2✔
484
        const char *range, *shift;
2✔
485
        int r;
2✔
486
        uid_t uid_shift, uid_range = 65536;
2✔
487

488
        assert(s);
2✔
489

490
        range = strchr(s, ':');
2✔
491
        if (range) {
2✔
492
                buffer = strndup(s, range - s);
×
493
                if (!buffer)
×
494
                        return log_oom();
×
495
                shift = buffer;
×
496

UNCOV
497
                range++;
×
UNCOV
498
                r = safe_atou32(range, &uid_range);
×
UNCOV
499
                if (r < 0)
×
UNCOV
500
                        return log_error_errno(r, "Failed to parse UID range \"%s\": %m", range);
×
501
        } else
502
                shift = s;
503

504
        r = parse_uid(shift, &uid_shift);
2✔
505
        if (r < 0)
2✔
506
                return log_error_errno(r, "Failed to parse UID \"%s\": %m", s);
2✔
507

UNCOV
508
        if (!userns_shift_range_valid(uid_shift, uid_range))
×
509
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID range cannot be empty or go beyond " UID_FMT ".", UID_INVALID);
×
510

UNCOV
511
        if (ret_uid_shift)
×
UNCOV
512
                *ret_uid_shift = uid_shift;
×
513

UNCOV
514
        if (ret_uid_range)
×
UNCOV
515
                *ret_uid_range = uid_range;
×
516

517
        return 0;
518
}
519

520
int userns_acquire_empty(void) {
22✔
521
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
22✔
522
        int r;
22✔
523

524
        r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
22✔
525
        if (r < 0)
22✔
526
                return r;
527
        assert(r > 0);
22✔
528

529
        return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
22✔
530
}
531

532
int userns_acquire(const char *uid_map, const char *gid_map, bool setgroups_deny) {
3,794✔
533
        char path[STRLEN("/proc//setgroups") + DECIMAL_STR_MAX(pid_t) + 1];
3,794✔
534
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
3,794✔
535
        int r;
3,794✔
536

537
        assert(uid_map);
3,794✔
538
        assert(gid_map);
3,794✔
539

540
        /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it,
541
         * and then kills the process again. This way we have a userns fd that is not bound to any
542
         * process. We can use that for file system mounts and similar. */
543

544
        r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
3,794✔
545
        if (r < 0)
3,794✔
546
                return r;
547
        assert(r > 0);
3,794✔
548

549
        xsprintf(path, "/proc/" PID_FMT "/uid_map", pid.pid);
3,794✔
550
        r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
3,794✔
551
        if (r < 0)
3,794✔
UNCOV
552
                return log_debug_errno(r, "Failed to write UID map: %m");
×
553

554
        if (setgroups_deny) {
3,794✔
555
                xsprintf(path, "/proc/" PID_FMT "/setgroups", pid.pid);
3,794✔
556
                r = write_string_file(path, "deny", WRITE_STRING_FILE_DISABLE_BUFFER);
3,794✔
557
                if (r < 0)
3,794✔
UNCOV
558
                        return log_debug_errno(r, "Failed to write setgroups file: %m");
×
559
        }
560

561
        xsprintf(path, "/proc/" PID_FMT "/gid_map", pid.pid);
3,794✔
562
        r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
3,794✔
563
        if (r < 0)
3,794✔
UNCOV
564
                return log_debug_errno(r, "Failed to write GID map: %m");
×
565

566
        return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
3,794✔
567
}
568

569
int userns_acquire_self_root(void) {
3,665✔
570

571
        /* Returns a user namespace with only our own uid/gid mapped to root, and everything else unmapped.
572
         *
573
         * Note: this can be acquired unprivileged! */
574

575
        _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
3,665✔
576
        if (asprintf(&uid_map, "0 " UID_FMT " 1", getuid()) < 0)
3,665✔
577
                return -ENOMEM;
578
        if (asprintf(&gid_map, "0 " GID_FMT " 1", getgid()) < 0)
3,665✔
579
                return -ENOMEM;
580

581
        return userns_acquire(uid_map, gid_map, /* setgroups_deny= */ true);
3,665✔
582
}
583

584
int userns_enter_and_pin(int userns_fd, pid_t *ret_pid) {
40✔
585
        _cleanup_close_pair_ int pfd[2] = EBADF_PAIR;
40✔
586
        _cleanup_(sigkill_waitp) pid_t pid = 0;
40✔
587
        ssize_t n;
40✔
588
        char x;
40✔
589
        int r;
40✔
590

591
        assert(userns_fd >= 0);
40✔
592
        assert(ret_pid);
40✔
593

594
        if (pipe2(pfd, O_CLOEXEC) < 0)
40✔
UNCOV
595
                return -errno;
×
596

597
        r = safe_fork_full(
80✔
598
                        "(sd-pinuserns)",
599
                        /* stdio_fds= */ NULL,
600
                        (int[]) { pfd[1], userns_fd }, 2,
40✔
601
                        FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL,
602
                        &pid);
603
        if (r < 0)
40✔
604
                return r;
605
        if (r == 0) {
40✔
606
                /* Child. */
607

608
                if (setns(userns_fd, CLONE_NEWUSER) < 0) {
×
UNCOV
609
                        log_debug_errno(errno, "Failed to join userns: %m");
×
610
                        _exit(EXIT_FAILURE);
×
611
                }
612

613
                userns_fd = safe_close(userns_fd);
×
614

615
                n = write(pfd[1], &(const char) { 'x' }, 1);
×
UNCOV
616
                if (n < 0) {
×
617
                        log_debug_errno(errno, "Failed to write to pipe: %m");
×
UNCOV
618
                        _exit(EXIT_FAILURE);
×
619
                }
UNCOV
620
                assert(n == 1);
×
621

UNCOV
622
                freeze();
×
623
        }
624

625
        pfd[1] = safe_close(pfd[1]);
40✔
626

627
        n = read(pfd[0], &x, 1);
40✔
628
        if (n < 0)
40✔
UNCOV
629
                return -errno;
×
630
        if (n == 0)
40✔
631
                return -EPROTO;
632
        assert(n == 1);
40✔
633
        assert(x == 'x');
40✔
634

635
        *ret_pid = TAKE_PID(pid);
40✔
636
        return 0;
40✔
637
}
638

639
int userns_get_base_uid(int userns_fd, uid_t *ret_uid, gid_t *ret_gid) {
24✔
640
        _cleanup_(sigkill_waitp) pid_t pid = 0;
24✔
641
        int r;
24✔
642

643
        assert(userns_fd >= 0);
24✔
644

645
        r = userns_enter_and_pin(userns_fd, &pid);
24✔
646
        if (r < 0)
24✔
647
                return r;
648

649
        uid_t uid;
24✔
650
        r = uid_map_search_root(pid, UID_RANGE_USERNS_OUTSIDE, &uid);
24✔
651
        if (r < 0)
24✔
652
                return r;
653

654
        gid_t gid;
21✔
655
        r = uid_map_search_root(pid, GID_RANGE_USERNS_OUTSIDE, &gid);
21✔
656
        if (r < 0)
21✔
657
                return r;
658

659
        if (!ret_gid && uid != gid)
21✔
660
                return -EUCLEAN;
661

662
        if (ret_uid)
18✔
663
                *ret_uid = uid;
18✔
664
        if (ret_gid)
18✔
665
                *ret_gid = gid;
3✔
666

667
        return 0;
668
}
669

670
int process_is_owned_by_uid(const PidRef *pidref, uid_t uid) {
9✔
671
        int r;
9✔
672

673
        /* Checks if the specified process either is owned directly by the specified user, or if it is inside
674
         * a user namespace owned by it. */
675

676
        assert(uid_is_valid(uid));
9✔
677

678
        uid_t process_uid;
9✔
679
        r = pidref_get_uid(pidref, &process_uid);
9✔
680
        if (r < 0)
9✔
681
                return r;
9✔
682
        if (process_uid == uid)
9✔
683
                return true;
684

685
        _cleanup_close_ int userns_fd = -EBADF;
9✔
686
        userns_fd = pidref_namespace_open_by_type(pidref, NAMESPACE_USER);
6✔
687
        if (userns_fd == -ENOPKG) /* If userns is not supported, then they don't matter for ownership */
6✔
688
                return false;
689
        if (userns_fd < 0)
6✔
690
                return userns_fd;
691

UNCOV
692
        for (unsigned iteration = 0;; iteration++) {
×
693
                uid_t ns_uid;
6✔
694

695
                /* This process is in our own userns? Then we are done, in our own userns only the UIDs
696
                 * themselves matter. */
697
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
6✔
698
                if (r < 0)
6✔
699
                        return r;
6✔
700
                if (r > 0)
6✔
701
                        return false;
702

703
                if (ioctl(userns_fd, NS_GET_OWNER_UID, &ns_uid) < 0)
3✔
704
                        return -errno;
×
705
                if (ns_uid == uid)
3✔
706
                        return true;
707

708
                /* Paranoia check */
709
                if (iteration > 16)
×
710
                        return log_debug_errno(SYNTHETIC_ERRNO(ELOOP), "Giving up while tracing parents of user namespaces after %u steps.", iteration);
×
711

712
                /* Go up the tree */
713
                _cleanup_close_ int parent_fd = ioctl(userns_fd, NS_GET_USERNS);
6✔
UNCOV
714
                if (parent_fd < 0) {
×
UNCOV
715
                        if (errno == EPERM) /* EPERM means we left our own userns */
×
716
                                return false;
717

UNCOV
718
                        return -errno;
×
719
                }
720

UNCOV
721
                close_and_replace(userns_fd, parent_fd);
×
722
        }
723
}
724

725
int is_idmapping_supported(const char *path) {
3,664✔
726
        _cleanup_close_ int mount_fd = -EBADF, userns_fd = -EBADF, dir_fd = -EBADF;
7,328✔
727
        int r;
3,664✔
728

729
        assert(path);
3,664✔
730

731
        if (!mount_new_api_supported())
3,664✔
732
                return false;
733

734
        userns_fd = r = userns_acquire_self_root();
3,664✔
735
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
3,664✔
736
                return false;
737
        if (r == -ENOSPC) {
3,664✔
UNCOV
738
                log_debug_errno(r, "Failed to acquire new user namespace, user.max_user_namespaces seems to be exhausted or maybe even zero, assuming ID-mapping is not supported: %m");
×
UNCOV
739
                return false;
×
740
        }
741
        if (r < 0)
3,664✔
UNCOV
742
                return log_debug_errno(r, "Failed to acquire new user namespace for checking if '%s' supports ID-mapping: %m", path);
×
743

744
        dir_fd = r = RET_NERRNO(open(path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
3,664✔
745
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
7,328✔
746
                return false;
747
        if (r < 0)
3,664✔
UNCOV
748
                return log_debug_errno(r, "Failed to open '%s', cannot determine if ID-mapping is supported: %m", path);
×
749

750
        mount_fd = r = RET_NERRNO(open_tree(dir_fd, "", AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC));
3,664✔
751
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
3,664✔
UNCOV
752
                return false;
×
753
        if (r < 0)
3,664✔
UNCOV
754
                return log_debug_errno(r, "Failed to open mount tree '%s', cannot determine if ID-mapping is supported: %m", path);
×
755

756
        r = RET_NERRNO(mount_setattr(mount_fd, "", AT_EMPTY_PATH,
3,664✔
757
                       &(struct mount_attr) {
3,664✔
758
                                .attr_set = MOUNT_ATTR_IDMAP | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RDONLY | MOUNT_ATTR_NODEV,
759
                                .userns_fd = userns_fd,
760
                        }, sizeof(struct mount_attr)));
761
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
3,664✔
762
                return false;
6✔
763
        if (r < 0)
3,658✔
UNCOV
764
                return log_debug_errno(r, "Failed to set mount attribute to '%s', cannot determine if ID-mapping is supported: %m", path);
×
765

766
        return true;
767
}
768

769
int netns_acquire(void) {
7✔
770
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
7✔
771
        int r;
7✔
772

773
        /* Forks off a process in a new network namespace, acquires a network namespace fd, and then kills
774
         * the process again. This way we have a netns fd that is not bound to any process. */
775

776
        r = pidref_safe_fork("(sd-mknetns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_NETNS|FORK_FREEZE, &pid);
7✔
777
        if (r < 0)
7✔
UNCOV
778
                return log_debug_errno(r, "Failed to fork process into new netns: %m");
×
779
        assert(r > 0);
7✔
780

781
        return pidref_namespace_open_by_type(&pid, NAMESPACE_NET);
7✔
782
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc