• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 22121027152

17 Feb 2026 11:25PM UTC coverage: 72.452% (-0.2%) from 72.633%
22121027152

push

github

yuwata
pe-binary: fix missing le16toh() on NumberOfSections in pe_hash/uki_hash

pe_hash() and uki_hash() pass pe_header->pe.NumberOfSections directly
to typesafe_qsort() and FOREACH_ARRAY() without le16toh(). On
big-endian (s390x), NumberOfSections=3 gets read as 0x0300 (768),
while pe_load_sections() correctly converts it and only allocates 3
sections. This makes qsort process 768 elements on a 3-element
buffer, causing a heap-buffer-overflow (confirmed with ASAN on
native s390x).

Wrap all three raw usages with le16toh() to match pe_load_sections().

3 of 4 new or added lines in 1 file covered. (75.0%)

5909 existing lines in 86 files now uncovered.

312380 of 431157 relevant lines covered (72.45%)

1145272.29 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.87
/src/basic/namespace-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <fcntl.h>
4
#include <linux/magic.h>
5
#include <linux/nsfs.h>
6
#include <sched.h>
7
#include <sys/ioctl.h>
8
#include <sys/mount.h>
9
#include <unistd.h>
10

11
#include "capability-util.h"
12
#include "dlfcn-util.h"
13
#include "errno-util.h"
14
#include "fd-util.h"
15
#include "fileio.h"
16
#include "log.h"
17
#include "mountpoint-util.h"
18
#include "namespace-util.h"
19
#include "parse-util.h"
20
#include "pidfd-util.h"
21
#include "pidref.h"
22
#include "process-util.h"
23
#include "stat-util.h"
24
#include "stdio-util.h"
25
#include "uid-range.h"
26
#include "user-util.h"
27

28
const struct namespace_info namespace_info[_NAMESPACE_TYPE_MAX + 1] = {
29
        [NAMESPACE_CGROUP] =  { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, PROC_CGROUP_INIT_INO },
30
        [NAMESPACE_IPC]    =  { "ipc",    "ns/ipc",    CLONE_NEWIPC,    PIDFD_GET_IPC_NAMESPACE,    PROC_IPC_INIT_INO    },
31
        [NAMESPACE_NET]    =  { "net",    "ns/net",    CLONE_NEWNET,    PIDFD_GET_NET_NAMESPACE,    0                    },
32
        /* So, the mount namespace flag is called CLONE_NEWNS for historical
33
         * reasons. Let's expose it here under a more explanatory name: "mnt".
34
         * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
35
        [NAMESPACE_MOUNT]  =  { "mnt",    "ns/mnt",    CLONE_NEWNS,     PIDFD_GET_MNT_NAMESPACE,    0                    },
36
        [NAMESPACE_PID]    =  { "pid",    "ns/pid",    CLONE_NEWPID,    PIDFD_GET_PID_NAMESPACE,    PROC_PID_INIT_INO    },
37
        [NAMESPACE_USER]   =  { "user",   "ns/user",   CLONE_NEWUSER,   PIDFD_GET_USER_NAMESPACE,   PROC_USER_INIT_INO   },
38
        [NAMESPACE_UTS]    =  { "uts",    "ns/uts",    CLONE_NEWUTS,    PIDFD_GET_UTS_NAMESPACE,    PROC_UTS_INIT_INO    },
39
        [NAMESPACE_TIME]   =  { "time",   "ns/time",   CLONE_NEWTIME,   PIDFD_GET_TIME_NAMESPACE,   PROC_TIME_INIT_INO   },
40
        {}, /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */
41
};
42

43
#define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path)
44

45
NamespaceType clone_flag_to_namespace_type(unsigned long clone_flag) {
1,162✔
46
        for (NamespaceType t = 0; t < _NAMESPACE_TYPE_MAX; t++)
6,132✔
47
                if (((namespace_info[t].clone_flag ^ clone_flag) & (CLONE_NEWCGROUP|CLONE_NEWIPC|CLONE_NEWNET|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUSER|CLONE_NEWUTS|CLONE_NEWTIME)) == 0)
6,132✔
48
                        return t;
49

50
        return _NAMESPACE_TYPE_INVALID;
51
}
52

53
bool namespace_type_supported(NamespaceType type) {
1,456✔
54
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
1,456✔
55

56
        const char *p = pid_namespace_path(0, type);
7,280✔
57
        return access(p, F_OK) >= 0;
1,456✔
58
}
59

60
static int pidref_namespace_open_by_type_internal(const PidRef *pidref, NamespaceType type, bool *need_verify) {
8,827✔
61
        int r;
8,827✔
62

63
        assert(pidref_is_set(pidref));
8,827✔
64
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
8,827✔
65

66
        if (pidref_is_remote(pidref))
8,827✔
67
                return -EREMOTE;
8,827✔
68

69
        if (pidref->fd >= 0) {
8,827✔
70
                r = pidfd_get_namespace(pidref->fd, namespace_info[type].pidfd_get_ns_ioctl_cmd);
8,821✔
71
                if (r == -ENOPKG)
8,821✔
UNCOV
72
                        return log_debug_errno(
×
73
                                        r,
74
                                        "Cannot open %s namespace for PID "PID_FMT" as the namespace type is not supported by the kernel",
75
                                        namespace_info[type].proc_name, pidref->pid);
76
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
8,821✔
77
                        return r;
78
        }
79

80
        if (need_verify) /* The caller shall call pidref_verify() later */
6✔
UNCOV
81
                *need_verify = true;
×
82

83
        _cleanup_close_ int nsfd = -EBADF;
8,827✔
84
        const char *p;
6✔
85

86
        p = pid_namespace_path(pidref->pid, type);
6✔
87
        nsfd = RET_NERRNO(open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC));
6✔
88
        if (nsfd == -ENOENT) {
×
UNCOV
89
                r = proc_mounted();
×
UNCOV
90
                if (r == 0)
×
91
                        /* /proc/ is not available or not set up properly, we're most likely in some chroot environment. */
UNCOV
92
                        return log_debug_errno(
×
93
                                        SYNTHETIC_ERRNO(ENOSYS),
94
                                        "Cannot open %s namespace for PID "PID_FMT" as /proc is not mounted",
95
                                        namespace_info[type].proc_name, pidref->pid);
UNCOV
96
                if (r > 0)
×
97
                        /* If /proc/ is definitely around then this means the namespace type is not supported */
UNCOV
98
                        return log_debug_errno(
×
99
                                        SYNTHETIC_ERRNO(ENOPKG),
100
                                        "Cannot open %s namespace for PID "PID_FMT" via /proc as the namespace type is not supported by the kernel",
101
                                        namespace_info[type].proc_name, pidref->pid);
102

103
                /* can't determine? then propagate original error */
104
        }
105
        if (nsfd < 0)
6✔
106
                return nsfd;
107

108
        if (!need_verify) { /* Otherwise we verify on our own */
6✔
109
                r = pidref_verify(pidref);
6✔
110
                if (r < 0)
6✔
UNCOV
111
                        return r;
×
112
        }
113

114
        return TAKE_FD(nsfd);
115
}
116

117
int pidref_namespace_open_by_type(const PidRef *pidref, NamespaceType type) {
8,510✔
118
        return pidref_namespace_open_by_type_internal(pidref, type, NULL);
8,510✔
119
}
120

121
int namespace_open_by_type(NamespaceType type) {
5,165✔
122
        _cleanup_(pidref_done) PidRef self = PIDREF_NULL;
5,165✔
123
        int r;
5,165✔
124

125
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
5,165✔
126

127
        r = pidref_set_self(&self);
5,165✔
128
        if (r < 0)
5,165✔
129
                return r;
130

131
        return pidref_namespace_open_by_type(&self, type);
5,165✔
132
}
133

134
int pidref_namespace_open(
157✔
135
                const PidRef *pidref,
136
                int *ret_pidns_fd,
137
                int *ret_mntns_fd,
138
                int *ret_netns_fd,
139
                int *ret_userns_fd,
140
                int *ret_root_fd) {
141

142
        _cleanup_close_ int pidns_fd = -EBADF, mntns_fd = -EBADF, netns_fd = -EBADF,
314✔
143
                userns_fd = -EBADF, root_fd = -EBADF;
314✔
144
        bool need_verify = false;
157✔
145
        int r;
157✔
146

147
        assert(pidref_is_set(pidref));
157✔
148

149
        if (pidref_is_remote(pidref))
314✔
150
                return -EREMOTE;
151

152
        if (ret_pidns_fd) {
157✔
153
                pidns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_PID, &need_verify);
117✔
154
                if (pidns_fd < 0)
117✔
155
                        return pidns_fd;
156
        }
157

158
        if (ret_mntns_fd) {
157✔
159
                mntns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_MOUNT, &need_verify);
119✔
160
                if (mntns_fd < 0)
119✔
161
                        return mntns_fd;
162
        }
163

164
        if (ret_netns_fd) {
157✔
165
                netns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_NET, &need_verify);
41✔
166
                if (netns_fd < 0)
41✔
167
                        return netns_fd;
168
        }
169

170
        if (ret_userns_fd) {
157✔
171
                userns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_USER, &need_verify);
40✔
172
                if (userns_fd < 0 && userns_fd != -ENOPKG)
40✔
173
                        return userns_fd;
174
        }
175

176
        if (ret_root_fd) {
157✔
177
                const char *root;
119✔
178

179
                root = procfs_file_alloca(pidref->pid, "root");
119✔
180
                root_fd = RET_NERRNO(open(root, O_CLOEXEC|O_DIRECTORY));
119✔
UNCOV
181
                if (root_fd == -ENOENT && proc_mounted() == 0)
×
182
                        return -ENOSYS;
183
                if (root_fd < 0)
119✔
184
                        return root_fd;
185

186
                need_verify = true;
119✔
187
        }
188

189
        if (need_verify) {
157✔
190
                r = pidref_verify(pidref);
119✔
191
                if (r < 0)
119✔
192
                        return r;
193
        }
194

195
        if (ret_pidns_fd)
157✔
196
                *ret_pidns_fd = TAKE_FD(pidns_fd);
117✔
197

198
        if (ret_mntns_fd)
157✔
199
                *ret_mntns_fd = TAKE_FD(mntns_fd);
119✔
200

201
        if (ret_netns_fd)
157✔
202
                *ret_netns_fd = TAKE_FD(netns_fd);
41✔
203

204
        if (ret_userns_fd)
157✔
205
                *ret_userns_fd = TAKE_FD(userns_fd);
40✔
206

207
        if (ret_root_fd)
157✔
208
                *ret_root_fd = TAKE_FD(root_fd);
119✔
209

210
        return 0;
211
}
212

213
int namespace_open(
36✔
214
                pid_t pid,
215
                int *ret_pidns_fd,
216
                int *ret_mntns_fd,
217
                int *ret_netns_fd,
218
                int *ret_userns_fd,
219
                int *ret_root_fd) {
220

221
        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
36✔
222
        int r;
36✔
223

224
        r = pidref_set_pid(&pidref, pid);
36✔
225
        if (r < 0)
36✔
226
                return r;
227

228
        return pidref_namespace_open(&pidref, ret_pidns_fd, ret_mntns_fd, ret_netns_fd, ret_userns_fd, ret_root_fd);
36✔
229
}
230

231
int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
231✔
232
        int r;
231✔
233

234
        /* Block dlopen() now, to avoid us inadvertently loading shared library from another namespace */
235
        block_dlopen();
231✔
236

237
         /* Join namespaces, but only if we're not part of them already. This is important if we don't
238
          * necessarily own the namespace in question, as kernel would unconditionally return EPERM otherwise. */
239

240
        if (pidns_fd >= 0) {
231✔
241
                r = is_our_namespace(pidns_fd, NAMESPACE_PID);
61✔
242
                if (r < 0)
61✔
243
                        return r;
244
                if (r > 0)
61✔
245
                        pidns_fd = -EBADF;
27✔
246
        }
247

248
        if (mntns_fd >= 0) {
231✔
249
                r = is_our_namespace(mntns_fd, NAMESPACE_MOUNT);
176✔
250
                if (r < 0)
176✔
251
                        return r;
252
                if (r > 0)
176✔
UNCOV
253
                        mntns_fd = -EBADF;
×
254
        }
255

256
        if (netns_fd >= 0) {
231✔
257
                r = is_our_namespace(netns_fd, NAMESPACE_NET);
44✔
258
                if (r < 0)
44✔
259
                        return r;
260
                if (r > 0)
44✔
261
                        netns_fd = -EBADF;
1✔
262
        }
263

264
        if (userns_fd >= 0) {
231✔
265
                /* Can't setns to your own userns, since then you could escalate from non-root to root in
266
                 * your own namespace, so check if namespaces are equal before attempting to enter. */
267

268
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
15✔
269
                if (r < 0)
15✔
270
                        return r;
271
                if (r > 0)
15✔
272
                        userns_fd = -EBADF;
1✔
273
        }
274

275
        r = have_effective_cap(CAP_SYS_ADMIN);
231✔
276
        if (r < 0)
231✔
277
                return r;
278

279
        bool have_cap_sys_admin = r > 0;
231✔
280

281
        if (!have_cap_sys_admin) {
231✔
282
                /* If we don't have CAP_SYS_ADMIN in our own user namespace, our best bet is to enter the
283
                 * user namespace first (if we got one) to get CAP_SYS_ADMIN within the child user namespace,
284
                 * and then hope the other namespaces are owned by the child user namespace. If they aren't,
285
                 * we'll just get an EPERM later on when trying to setns() to them. */
286

287
                if (userns_fd < 0)
13✔
UNCOV
288
                        return log_debug_errno(
×
289
                                        SYNTHETIC_ERRNO(EPERM),
290
                                        "Need CAP_SYS_ADMIN or a child user namespace to enter namespaces.");
291

292
                if (setns(userns_fd, CLONE_NEWUSER) < 0)
13✔
UNCOV
293
                        return -errno;
×
294
        }
295

296
        if (pidns_fd >= 0)
231✔
297
                if (setns(pidns_fd, CLONE_NEWPID) < 0)
34✔
298
                        return -errno;
×
299

300
        if (mntns_fd >= 0)
231✔
301
                if (setns(mntns_fd, CLONE_NEWNS) < 0)
176✔
302
                        return -errno;
×
303

304
        if (netns_fd >= 0)
231✔
305
                if (setns(netns_fd, CLONE_NEWNET) < 0)
43✔
UNCOV
306
                        return -errno;
×
307

308
        if (userns_fd >= 0 && have_cap_sys_admin)
231✔
309
                if (setns(userns_fd, CLONE_NEWUSER) < 0)
1✔
UNCOV
310
                        return -errno;
×
311

312
        if (root_fd >= 0) {
231✔
313
                if (fchdir(root_fd) < 0)
64✔
UNCOV
314
                        return -errno;
×
315

316
                if (chroot(".") < 0)
64✔
UNCOV
317
                        return -errno;
×
318
        }
319

320
        if (userns_fd >= 0) {
231✔
321
                /* Try to become root in the user namespace but don't error out if we can't, since it's not
322
                 * uncommon to have user namespaces without a root user in them. */
323
                r = reset_uid_gid();
14✔
324
                if (r < 0)
14✔
325
                        log_debug_errno(r, "Unable to drop auxiliary groups or reset UID/GID, ignoring: %m");
2✔
326
        }
327

328
        return 0;
329
}
330

331
int fd_is_namespace(int fd, NamespaceType type) {
1,161✔
332
        int r;
1,161✔
333

334
        /* Checks whether the specified file descriptor refers to a namespace (of type if type != _NAMESPACE_INVALID). */
335

336
        assert(fd >= 0);
1,161✔
337
        assert(type < _NAMESPACE_TYPE_MAX);
1,161✔
338

339
        r = fd_is_fs_type(fd, NSFS_MAGIC);
1,161✔
340
        if (r <= 0)
1,161✔
341
                return r;
342

343
        if (type < 0)
1,158✔
344
                return true;
345

346
        int clone_flag = ioctl(fd, NS_GET_NSTYPE);
1,158✔
347
        if (clone_flag < 0)
1,158✔
UNCOV
348
                return -errno;
×
349

350
        NamespaceType found_type = clone_flag_to_namespace_type(clone_flag);
1,158✔
351
        if (found_type < 0)
1,158✔
352
                return -EBADF; /* Uh? Unknown namespace type? */
353

354
        return found_type == type;
1,158✔
355
}
356

357
int is_our_namespace(int fd, NamespaceType type) {
679✔
358
        int r;
679✔
359

360
        assert(fd >= 0);
679✔
361
        assert(type < _NAMESPACE_TYPE_MAX);
679✔
362

363
        r = fd_is_namespace(fd, type);
679✔
364
        if (r < 0)
679✔
365
                return r;
679✔
366
        if (r == 0) /* Not a namespace or not of the right type? */
679✔
367
                return -EUCLEAN;
368

369
        _cleanup_close_ int our_ns = namespace_open_by_type(type);
1,358✔
370
        if (our_ns < 0)
679✔
371
                return our_ns;
372

373
        return fd_inode_same(fd, our_ns);
679✔
374
}
375

376
int are_our_namespaces(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
35✔
377
        int r;
35✔
378

379
        if (pidns_fd >= 0) {
35✔
380
                r = is_our_namespace(pidns_fd, NAMESPACE_PID);
35✔
381
                if (r <= 0)
35✔
382
                        return r;
383
        }
384

385
        if (mntns_fd >= 0) {
1✔
386
                r = is_our_namespace(mntns_fd, NAMESPACE_MOUNT);
1✔
387
                if (r <= 0)
1✔
388
                        return r;
389
        }
390

391
        if (netns_fd >= 0) {
1✔
UNCOV
392
                r = is_our_namespace(netns_fd, NAMESPACE_NET);
×
UNCOV
393
                if (r <= 0)
×
394
                        return r;
395
        }
396

397
        if (userns_fd >= 0) {
1✔
398
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
1✔
399
                if (r <= 0)
1✔
400
                        return r;
401
        }
402

403
        if (root_fd >= 0) {
1✔
404
                r = dir_fd_is_root(root_fd);
1✔
405
                if (r <= 0)
1✔
UNCOV
406
                        return r;
×
407
        }
408

409
        return true;
410
}
411

412
int namespace_is_init(NamespaceType type) {
5,263✔
413
        int r;
5,263✔
414

415
        assert(type >= 0);
5,263✔
416
        assert(type < _NAMESPACE_TYPE_MAX);
5,263✔
417

418
        if (namespace_info[type].root_inode == 0)
5,263✔
419
                return -EBADR; /* Cannot answer this question */
5,263✔
420

421
        const char *p = pid_namespace_path(0, type);
26,305✔
422

423
        struct stat st;
5,261✔
424
        r = RET_NERRNO(stat(p, &st));
5,261✔
425
        if (r == -ENOENT) {
14✔
426
                /* If the /proc/ns/<type> API is not around in /proc/ then ns is off in the kernel and we are in the init ns */
427
                r = proc_mounted();
14✔
428
                if (r < 0)
14✔
429
                        return -ENOENT; /* If we can't determine if /proc/ is mounted propagate original error */
430

431
                return r ? true : -ENOSYS;
14✔
432
        }
433
        if (r < 0)
5,247✔
434
                return r;
435

436
        return st.st_ino == namespace_info[type].root_inode;
5,247✔
437
}
438

439
int pidref_in_same_namespace(PidRef *pid1, PidRef *pid2, NamespaceType type) {
99✔
440
        _cleanup_close_ int ns1 = -EBADF, ns2 = -EBADF;
99✔
441

442
        /* Accepts NULL to indicate our own process */
443

444
        assert(!pid1 || pidref_is_set(pid1));
99✔
445
        assert(!pid2 || pidref_is_set(pid2));
99✔
446
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
99✔
447

448
        if (pidref_equal(pid1, pid2))
99✔
449
                return true;
450

451
        if (!pid1)
99✔
452
                ns1 = namespace_open_by_type(type);
81✔
453
        else
454
                ns1 = pidref_namespace_open_by_type(pid1, type);
18✔
455
        if (ns1 < 0)
99✔
456
                return ns1;
457

458
        if (!pid2)
99✔
459
                ns2 = namespace_open_by_type(type);
6✔
460
        else
461
                ns2 = pidref_namespace_open_by_type(pid2, type);
93✔
462
        if (ns2 < 0)
99✔
463
                return ns2;
464

465
        return fd_inode_same(ns1, ns2);
99✔
466
}
467

468
int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) {
2✔
469
        assert(pid1 >= 0);
2✔
470
        assert(pid2 >= 0);
2✔
471
        return pidref_in_same_namespace(pid1 == 0 ? NULL : &PIDREF_MAKE_FROM_PID(pid1),
2✔
UNCOV
472
                                        pid2 == 0 ? NULL : &PIDREF_MAKE_FROM_PID(pid2),
×
473
                                        type);
474
}
475

476
int namespace_get_leader(PidRef *pidref, NamespaceType type, PidRef *ret) {
3✔
477
        int r;
3✔
478

479
        /* Note: we don't bother with pidref_is_set()/pidref_is_remote() here, as the first call we do,
480
         * pidref_get_ppid_as_pidref() calls those anyway */
481

482
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
3✔
483
        assert(ret);
3✔
484

485
        _cleanup_(pidref_done) PidRef current = PIDREF_NULL;
3✔
486
        PidRef *c = pidref;
3✔
487

488
        for (;;) {
5✔
489
                _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
3✔
490

491
                r = pidref_get_ppid_as_pidref(c, &parent);
8✔
492
                if (r < 0)
8✔
493
                        return r;
494

495
                r = pidref_in_same_namespace(c, &parent, type);
8✔
496
                if (r < 0)
8✔
497
                        return r;
498
                if (r == 0) {
8✔
499
                        /* If the parent and the child are not in the same namespace, then the child is
500
                         * the leader we are looking for. */
501

502
                        if (pidref_is_set(&current))
3✔
503
                                *ret = TAKE_PIDREF(current);
3✔
504
                        else {
UNCOV
505
                                r = pidref_copy(c, ret);
×
UNCOV
506
                                if (r < 0)
×
507
                                        return r;
508
                        }
509

510
                        return 0;
3✔
511
                }
512

513
                pidref_done(&current);
5✔
514
                current = TAKE_PIDREF(parent);
5✔
515
                c = &current;
5✔
516
        }
517
}
518

519
int detach_mount_namespace(void) {
291✔
520
        /* Detaches the mount namespace, disabling propagation from our namespace to the host. Sets
521
         * propagation first to MS_SLAVE for all mounts (disabling propagation), and then back to MS_SHARED
522
         * (so that we create a new peer group).  */
523

524
        if (unshare(CLONE_NEWNS) < 0)
291✔
UNCOV
525
                return log_debug_errno(errno, "Failed to acquire mount namespace: %m");
×
526

527
        if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
291✔
UNCOV
528
                return log_debug_errno(errno, "Failed to set mount propagation to MS_SLAVE for all mounts: %m");
×
529

530
        if (mount(NULL, "/", NULL, MS_SHARED | MS_REC, NULL) < 0)
291✔
531
                return log_debug_errno(errno, "Failed to set mount propagation back to MS_SHARED for all mounts: %m");
×
532

533
        return 0;
534
}
535

536
int detach_mount_namespace_harder(uid_t target_uid, gid_t target_gid) {
87✔
537
        uid_t from_uid;
87✔
538
        gid_t from_gid;
87✔
539
        int r;
87✔
540

541
        /* Tried detach_mount_namespace() first. If that doesn't work due to permissions, opens up an
542
         * unprivileged user namespace with a mapping of the originating UID/GID to the specified target
543
         * UID/GID. Then, tries detach_mount_namespace() again.
544
         *
545
         * Or in other words: tries much harder to get a mount namespace, making use of unprivileged user
546
         * namespaces if need be.
547
         *
548
         * Note that after this function completed:
549
         *
550
         *    → if we had privs, afterwards uids/gids on files and processes are as before
551
         *
552
         *    → if we had no privs, our own id and all our files will show up owned by target_uid/target_gid,
553
         *    and everything else owned by nobody.
554
         *
555
         * Yes, that's quite a difference. */
556

557
        if (!uid_is_valid(target_uid))
87✔
558
                return -EINVAL;
559
        if (!gid_is_valid(target_gid))
87✔
560
                return -EINVAL;
561

562
        r = detach_mount_namespace();
87✔
563
        if (r != -EPERM)
87✔
564
                return r;
565

566
        from_uid = getuid();
×
567
        from_gid = getgid();
×
568

569
        if (unshare(CLONE_NEWUSER) < 0)
×
UNCOV
570
                return log_debug_errno(errno, "Failed to acquire user namespace: %m");
×
571

572
        r = write_string_filef("/proc/self/uid_map", 0,
×
573
                               UID_FMT " " UID_FMT " 1\n", target_uid, from_uid);
574
        if (r < 0)
×
UNCOV
575
                return log_debug_errno(r, "Failed to write uid map: %m");
×
576

UNCOV
577
        r = write_string_file("/proc/self/setgroups", "deny", 0);
×
UNCOV
578
        if (r < 0)
×
UNCOV
579
                return log_debug_errno(r, "Failed to write setgroups file: %m");
×
580

UNCOV
581
        r = write_string_filef("/proc/self/gid_map", 0,
×
582
                               GID_FMT " " GID_FMT " 1\n", target_gid, from_gid);
583
        if (r < 0)
×
UNCOV
584
                return log_debug_errno(r, "Failed to write gid map: %m");
×
585

586
        return detach_mount_namespace();
×
587
}
588

589
int detach_mount_namespace_userns(int userns_fd) {
65✔
590
        int r;
65✔
591

592
        assert(userns_fd >= 0);
65✔
593

594
        if (setns(userns_fd, CLONE_NEWUSER) < 0)
65✔
UNCOV
595
                return log_debug_errno(errno, "Failed to join user namespace: %m");
×
596

597
        r = reset_uid_gid();
65✔
598
        if (r < 0)
65✔
UNCOV
599
                return log_debug_errno(r, "Failed to become root in user namespace: %m");
×
600

601
        return detach_mount_namespace();
65✔
602
}
603

604
int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_range) {
2✔
605
        _cleanup_free_ char *buffer = NULL;
2✔
606
        const char *range, *shift;
2✔
607
        int r;
2✔
608
        uid_t uid_shift, uid_range = 65536;
2✔
609

610
        assert(s);
2✔
611

612
        range = strchr(s, ':');
2✔
613
        if (range) {
2✔
UNCOV
614
                buffer = strndup(s, range - s);
×
UNCOV
615
                if (!buffer)
×
UNCOV
616
                        return log_oom();
×
UNCOV
617
                shift = buffer;
×
618

UNCOV
619
                range++;
×
UNCOV
620
                r = safe_atou32(range, &uid_range);
×
UNCOV
621
                if (r < 0)
×
UNCOV
622
                        return log_error_errno(r, "Failed to parse UID range \"%s\": %m", range);
×
623
        } else
624
                shift = s;
625

626
        r = parse_uid(shift, &uid_shift);
2✔
627
        if (r < 0)
2✔
628
                return log_error_errno(r, "Failed to parse UID \"%s\": %m", s);
2✔
629

UNCOV
630
        if (!userns_shift_range_valid(uid_shift, uid_range))
×
UNCOV
631
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID range cannot be empty or go beyond " UID_FMT ".", UID_INVALID);
×
632

UNCOV
633
        if (ret_uid_shift)
×
UNCOV
634
                *ret_uid_shift = uid_shift;
×
635

UNCOV
636
        if (ret_uid_range)
×
UNCOV
637
                *ret_uid_range = uid_range;
×
638

639
        return 0;
640
}
641

642
int userns_acquire_empty(void) {
99✔
643
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
99✔
644
        int r;
99✔
645

646
        r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
99✔
647
        if (r < 0)
99✔
648
                return r;
649
        assert(r > 0);
98✔
650

651
        return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
98✔
652
}
653

654
int userns_acquire(const char *uid_map, const char *gid_map, bool setgroups_deny) {
3,108✔
655
        char path[STRLEN("/proc//setgroups") + DECIMAL_STR_MAX(pid_t) + 1];
3,108✔
656
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
3,108✔
657
        int r;
3,108✔
658

659
        assert(uid_map);
3,108✔
660
        assert(gid_map);
3,108✔
661

662
        /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it,
663
         * and then kills the process again. This way we have a userns fd that is not bound to any
664
         * process. We can use that for file system mounts and similar. */
665

666
        r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
3,108✔
667
        if (r < 0)
3,108✔
668
                return r;
669
        assert(r > 0);
3,108✔
670

671
        xsprintf(path, "/proc/" PID_FMT "/uid_map", pid.pid);
3,108✔
672
        r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
3,108✔
673
        if (r < 0)
3,108✔
UNCOV
674
                return log_debug_errno(r, "Failed to write UID map: %m");
×
675

676
        if (setgroups_deny) {
3,108✔
677
                xsprintf(path, "/proc/" PID_FMT "/setgroups", pid.pid);
3,108✔
678
                r = write_string_file(path, "deny", WRITE_STRING_FILE_DISABLE_BUFFER);
3,108✔
679
                if (r < 0)
3,108✔
UNCOV
680
                        return log_debug_errno(r, "Failed to write setgroups file: %m");
×
681
        }
682

683
        xsprintf(path, "/proc/" PID_FMT "/gid_map", pid.pid);
3,108✔
684
        r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
3,108✔
685
        if (r < 0)
3,108✔
UNCOV
686
                return log_debug_errno(r, "Failed to write GID map: %m");
×
687

688
        return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
3,108✔
689
}
690

691
int userns_acquire_self_root(void) {
2,928✔
692

693
        /* Returns a user namespace with only our own uid/gid mapped to root, and everything else unmapped.
694
         *
695
         * Note: this can be acquired unprivileged! */
696

697
        _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
2,928✔
698
        if (asprintf(&uid_map, "0 " UID_FMT " 1", getuid()) < 0)
2,928✔
699
                return -ENOMEM;
700
        if (asprintf(&gid_map, "0 " GID_FMT " 1", getgid()) < 0)
2,928✔
701
                return -ENOMEM;
702

703
        return userns_acquire(uid_map, gid_map, /* setgroups_deny= */ true);
2,928✔
704
}
705

706
int userns_enter_and_pin(int userns_fd, PidRef *ret) {
300✔
707
        _cleanup_close_pair_ int pfd[2] = EBADF_PAIR;
300✔
708
        _cleanup_(pidref_done_sigkill_wait) PidRef pidref = PIDREF_NULL;
300✔
709
        ssize_t n;
300✔
710
        char x;
300✔
711
        int r;
300✔
712

713
        assert(userns_fd >= 0);
300✔
714
        assert(ret);
300✔
715

716
        if (pipe2(pfd, O_CLOEXEC) < 0)
300✔
UNCOV
717
                return -errno;
×
718

719
        r = pidref_safe_fork_full(
600✔
720
                        "(sd-pinuserns)",
721
                        /* stdio_fds= */ NULL,
722
                        (int[]) { pfd[1], userns_fd }, 2,
300✔
723
                        FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG|FORK_DEATHSIG_SIGKILL,
724
                        &pidref);
725
        if (r < 0)
300✔
726
                return r;
727
        if (r == 0) {
300✔
728
                /* Child. */
729

UNCOV
730
                if (setns(userns_fd, CLONE_NEWUSER) < 0) {
×
UNCOV
731
                        log_debug_errno(errno, "Failed to join userns: %m");
×
UNCOV
732
                        _exit(EXIT_FAILURE);
×
733
                }
734

UNCOV
735
                userns_fd = safe_close(userns_fd);
×
736

UNCOV
737
                n = write(pfd[1], &(const char) { 'x' }, 1);
×
UNCOV
738
                if (n < 0) {
×
UNCOV
739
                        log_debug_errno(errno, "Failed to write to pipe: %m");
×
UNCOV
740
                        _exit(EXIT_FAILURE);
×
741
                }
UNCOV
742
                assert(n == 1);
×
743

UNCOV
744
                freeze();
×
745
        }
746

747
        pfd[1] = safe_close(pfd[1]);
300✔
748

749
        n = read(pfd[0], &x, 1);
300✔
750
        if (n < 0)
300✔
UNCOV
751
                return -errno;
×
752
        if (n == 0)
300✔
753
                return -EPROTO;
754
        assert(n == 1);
300✔
755
        assert(x == 'x');
300✔
756

757
        *ret = TAKE_PIDREF(pidref);
300✔
758
        return 0;
300✔
759
}
760

761
bool userns_supported(void) {
265✔
762
        return access("/proc/self/uid_map", F_OK) >= 0;
265✔
763
}
764

765
int userns_get_base_uid(int userns_fd, uid_t *ret_uid, gid_t *ret_gid) {
26✔
766
        _cleanup_(pidref_done_sigkill_wait) PidRef pidref = PIDREF_NULL;
26✔
767
        int r;
26✔
768

769
        assert(userns_fd >= 0);
26✔
770

771
        r = userns_enter_and_pin(userns_fd, &pidref);
26✔
772
        if (r < 0)
26✔
773
                return r;
774

775
        uid_t uid;
26✔
776
        r = uid_map_search_root(pidref.pid, UID_RANGE_USERNS_OUTSIDE, &uid);
26✔
777
        if (r < 0)
26✔
778
                return r;
779

780
        gid_t gid;
25✔
781
        r = uid_map_search_root(pidref.pid, GID_RANGE_USERNS_OUTSIDE, &gid);
25✔
782
        if (r < 0)
25✔
783
                return r;
784

785
        if (!ret_gid && uid != gid)
25✔
786
                return -EUCLEAN;
787

788
        if (ret_uid)
24✔
789
                *ret_uid = uid;
24✔
790
        if (ret_gid)
24✔
791
                *ret_gid = gid;
1✔
792

793
        return 0;
794
}
795

796
int process_is_owned_by_uid(const PidRef *pidref, uid_t uid) {
14✔
797
        int r;
14✔
798

799
        /* Checks if the specified process either is owned directly by the specified user, or if it is inside
800
         * a user namespace owned by it. */
801

802
        assert(uid_is_valid(uid));
14✔
803

804
        uid_t process_uid;
14✔
805
        r = pidref_get_uid(pidref, &process_uid);
14✔
806
        if (r < 0)
14✔
807
                return r;
14✔
808
        if (process_uid == uid)
14✔
809
                return true;
810

811
        _cleanup_close_ int userns_fd = -EBADF;
14✔
812
        userns_fd = pidref_namespace_open_by_type(pidref, NAMESPACE_USER);
11✔
813
        if (userns_fd == -ENOPKG) /* If userns is not supported, then they don't matter for ownership */
11✔
814
                return false;
815
        if (userns_fd < 0)
11✔
816
                return userns_fd;
817

UNCOV
818
        for (unsigned iteration = 0;; iteration++) {
×
819
                uid_t ns_uid;
11✔
820

821
                /* This process is in our own userns? Then we are done, in our own userns only the UIDs
822
                 * themselves matter. */
823
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
11✔
824
                if (r < 0)
11✔
825
                        return r;
11✔
826
                if (r > 0)
11✔
827
                        return false;
828

829
                if (ioctl(userns_fd, NS_GET_OWNER_UID, &ns_uid) < 0)
9✔
830
                        return -errno;
×
831
                if (ns_uid == uid)
9✔
832
                        return true;
833

834
                /* Paranoia check */
UNCOV
835
                if (iteration > 16)
×
UNCOV
836
                        return log_debug_errno(SYNTHETIC_ERRNO(ELOOP), "Giving up while tracing parents of user namespaces after %u steps.", iteration);
×
837

838
                /* Go up the tree */
839
                _cleanup_close_ int parent_fd = ioctl(userns_fd, NS_GET_USERNS);
11✔
840
                if (parent_fd < 0) {
×
UNCOV
841
                        if (errno == EPERM) /* EPERM means we left our own userns */
×
842
                                return false;
843

UNCOV
844
                        return -errno;
×
845
                }
846

UNCOV
847
                close_and_replace(userns_fd, parent_fd);
×
848
        }
849
}
850

851
int is_idmapping_supported(const char *path) {
2,927✔
852
        _cleanup_close_ int mount_fd = -EBADF, userns_fd = -EBADF, dir_fd = -EBADF;
5,854✔
853
        int r;
2,927✔
854

855
        assert(path);
2,927✔
856

857
        if (!mount_new_api_supported())
2,927✔
858
                return false;
859

860
        userns_fd = r = userns_acquire_self_root();
2,927✔
861
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
2,927✔
862
                return false;
863
        if (r == -ENOSPC) {
2,927✔
UNCOV
864
                log_debug_errno(r, "Failed to acquire new user namespace, user.max_user_namespaces seems to be exhausted or maybe even zero, assuming ID-mapping is not supported: %m");
×
UNCOV
865
                return false;
×
866
        }
867
        if (r < 0)
2,927✔
UNCOV
868
                return log_debug_errno(r, "Failed to acquire new user namespace for checking if '%s' supports ID-mapping: %m", path);
×
869

870
        dir_fd = r = RET_NERRNO(open(path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2,927✔
871
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
5,854✔
872
                return false;
873
        if (r < 0)
2,927✔
UNCOV
874
                return log_debug_errno(r, "Failed to open '%s', cannot determine if ID-mapping is supported: %m", path);
×
875

876
        mount_fd = r = RET_NERRNO(open_tree(dir_fd, "", AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC));
2,927✔
877
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
2,927✔
UNCOV
878
                return false;
×
879
        if (r < 0)
2,927✔
UNCOV
880
                return log_debug_errno(r, "Failed to open mount tree '%s', cannot determine if ID-mapping is supported: %m", path);
×
881

882
        r = RET_NERRNO(mount_setattr(mount_fd, "", AT_EMPTY_PATH,
2,927✔
883
                       &(struct mount_attr) {
2,927✔
884
                                .attr_set = MOUNT_ATTR_IDMAP | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RDONLY | MOUNT_ATTR_NODEV,
885
                                .userns_fd = userns_fd,
886
                        }, sizeof(struct mount_attr)));
887
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
2,927✔
UNCOV
888
                return false;
×
889
        if (r < 0)
2,927✔
UNCOV
890
                return log_debug_errno(r, "Failed to set mount attribute to '%s', cannot determine if ID-mapping is supported: %m", path);
×
891

892
        return true;
893
}
894

895
int netns_acquire(void) {
7✔
896
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
7✔
897
        int r;
7✔
898

899
        /* Forks off a process in a new network namespace, acquires a network namespace fd, and then kills
900
         * the process again. This way we have a netns fd that is not bound to any process. */
901

902
        r = pidref_safe_fork("(sd-mknetns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_NETNS|FORK_FREEZE, &pid);
7✔
903
        if (r < 0)
7✔
UNCOV
904
                return log_debug_errno(r, "Failed to fork process into new netns: %m");
×
905
        assert(r > 0);
7✔
906

907
        return pidref_namespace_open_by_type(&pid, NAMESPACE_NET);
7✔
908
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc