• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 20321298054

17 Dec 2025 11:19PM UTC coverage: 72.709% (-0.03%) from 72.736%
20321298054

push

github

YHNdnzj
core: set Result=start-limit-hit when a unit is rate limited

There is currently no way to figure out a rate limit was hit on a unit,
as the last result is stripped in order to keep reporting the first
result, which is useful in case of a watchdog failure, which is the
reason why it was changed as such.

But rate limiting is also an important information to provide to
users, so allow the Result property to reflect it when it
happens.

6 of 7 new or added lines in 7 files covered. (85.71%)

1358 existing lines in 49 files now uncovered.

309654 of 425883 relevant lines covered (72.71%)

1138050.05 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.75
/src/basic/namespace-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <fcntl.h>
4
#include <linux/magic.h>
5
#include <linux/nsfs.h>
6
#include <sched.h>
7
#include <sys/ioctl.h>
8
#include <sys/mount.h>
9
#include <unistd.h>
10

11
#include "dlfcn-util.h"
12
#include "errno-util.h"
13
#include "fd-util.h"
14
#include "fileio.h"
15
#include "log.h"
16
#include "mountpoint-util.h"
17
#include "namespace-util.h"
18
#include "parse-util.h"
19
#include "pidfd-util.h"
20
#include "pidref.h"
21
#include "process-util.h"
22
#include "stat-util.h"
23
#include "stdio-util.h"
24
#include "uid-range.h"
25
#include "user-util.h"
26

27
const struct namespace_info namespace_info[_NAMESPACE_TYPE_MAX + 1] = {
28
        [NAMESPACE_CGROUP] =  { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, PROC_CGROUP_INIT_INO },
29
        [NAMESPACE_IPC]    =  { "ipc",    "ns/ipc",    CLONE_NEWIPC,    PIDFD_GET_IPC_NAMESPACE,    PROC_IPC_INIT_INO    },
30
        [NAMESPACE_NET]    =  { "net",    "ns/net",    CLONE_NEWNET,    PIDFD_GET_NET_NAMESPACE,    0                    },
31
        /* So, the mount namespace flag is called CLONE_NEWNS for historical
32
         * reasons. Let's expose it here under a more explanatory name: "mnt".
33
         * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
34
        [NAMESPACE_MOUNT]  =  { "mnt",    "ns/mnt",    CLONE_NEWNS,     PIDFD_GET_MNT_NAMESPACE,    0                    },
35
        [NAMESPACE_PID]    =  { "pid",    "ns/pid",    CLONE_NEWPID,    PIDFD_GET_PID_NAMESPACE,    PROC_PID_INIT_INO    },
36
        [NAMESPACE_USER]   =  { "user",   "ns/user",   CLONE_NEWUSER,   PIDFD_GET_USER_NAMESPACE,   PROC_USER_INIT_INO   },
37
        [NAMESPACE_UTS]    =  { "uts",    "ns/uts",    CLONE_NEWUTS,    PIDFD_GET_UTS_NAMESPACE,    PROC_UTS_INIT_INO    },
38
        [NAMESPACE_TIME]   =  { "time",   "ns/time",   CLONE_NEWTIME,   PIDFD_GET_TIME_NAMESPACE,   PROC_TIME_INIT_INO   },
39
        {}, /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */
40
};
41

42
#define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path)
43

44
NamespaceType clone_flag_to_namespace_type(unsigned long clone_flag) {
338✔
45
        for (NamespaceType t = 0; t < _NAMESPACE_TYPE_MAX; t++)
1,780✔
46
                if (((namespace_info[t].clone_flag ^ clone_flag) & (CLONE_NEWCGROUP|CLONE_NEWIPC|CLONE_NEWNET|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUSER|CLONE_NEWUTS|CLONE_NEWTIME)) == 0)
1,780✔
47
                        return t;
48

49
        return _NAMESPACE_TYPE_INVALID;
50
}
51

52
bool namespace_type_supported(NamespaceType type) {
1,437✔
53
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
1,437✔
54

55
        const char *p = pid_namespace_path(0, type);
7,185✔
56
        return access(p, F_OK) >= 0;
1,437✔
57
}
58

59
static int pidref_namespace_open_by_type_internal(const PidRef *pidref, NamespaceType type, bool *need_verify) {
8,202✔
60
        int r;
8,202✔
61

62
        assert(pidref_is_set(pidref));
8,202✔
63
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
8,202✔
64

65
        if (pidref_is_remote(pidref))
8,202✔
66
                return -EREMOTE;
8,202✔
67

68
        if (pidref->fd >= 0) {
8,202✔
69
                r = pidfd_get_namespace(pidref->fd, namespace_info[type].pidfd_get_ns_ioctl_cmd);
8,196✔
70
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
8,196✔
71
                        return r;
72
        }
73

74
        if (need_verify) /* The caller shall call pidref_verify() later */
6✔
UNCOV
75
                *need_verify = true;
×
76

77
        _cleanup_close_ int nsfd = -EBADF;
8,202✔
78
        const char *p;
6✔
79

80
        p = pid_namespace_path(pidref->pid, type);
6✔
81
        nsfd = RET_NERRNO(open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC));
6✔
82
        if (nsfd == -ENOENT) {
×
83
                r = proc_mounted();
×
UNCOV
84
                if (r == 0)
×
85
                        return -ENOSYS;  /* /proc/ is not available or not set up properly, we're most likely
86
                                            in some chroot environment. */
UNCOV
87
                if (r > 0)
×
88
                        return -ENOPKG;  /* If /proc/ is definitely around then this means the namespace type is not supported */
89

90
                /* can't determine? then propagate original error */
91
        }
92
        if (nsfd < 0)
6✔
93
                return nsfd;
94

95
        if (!need_verify) { /* Otherwise we verify on our own */
6✔
96
                r = pidref_verify(pidref);
6✔
97
                if (r < 0)
6✔
UNCOV
98
                        return r;
×
99
        }
100

101
        return TAKE_FD(nsfd);
102
}
103

104
int pidref_namespace_open_by_type(const PidRef *pidref, NamespaceType type) {
7,898✔
105
        return pidref_namespace_open_by_type_internal(pidref, type, NULL);
7,898✔
106
}
107

108
int namespace_open_by_type(NamespaceType type) {
4,628✔
109
        _cleanup_(pidref_done) PidRef self = PIDREF_NULL;
4,628✔
110
        int r;
4,628✔
111

112
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
4,628✔
113

114
        r = pidref_set_self(&self);
4,628✔
115
        if (r < 0)
4,628✔
116
                return r;
117

118
        return pidref_namespace_open_by_type(&self, type);
4,628✔
119
}
120

121
int pidref_namespace_open(
151✔
122
                const PidRef *pidref,
123
                int *ret_pidns_fd,
124
                int *ret_mntns_fd,
125
                int *ret_netns_fd,
126
                int *ret_userns_fd,
127
                int *ret_root_fd) {
128

129
        _cleanup_close_ int pidns_fd = -EBADF, mntns_fd = -EBADF, netns_fd = -EBADF,
302✔
130
                userns_fd = -EBADF, root_fd = -EBADF;
302✔
131
        bool need_verify = false;
151✔
132
        int r;
151✔
133

134
        assert(pidref_is_set(pidref));
151✔
135

136
        if (pidref_is_remote(pidref))
302✔
137
                return -EREMOTE;
138

139
        if (ret_pidns_fd) {
151✔
140
                pidns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_PID, &need_verify);
112✔
141
                if (pidns_fd < 0)
112✔
142
                        return pidns_fd;
143
        }
144

145
        if (ret_mntns_fd) {
151✔
146
                mntns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_MOUNT, &need_verify);
113✔
147
                if (mntns_fd < 0)
113✔
148
                        return mntns_fd;
149
        }
150

151
        if (ret_netns_fd) {
151✔
152
                netns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_NET, &need_verify);
41✔
153
                if (netns_fd < 0)
41✔
154
                        return netns_fd;
155
        }
156

157
        if (ret_userns_fd) {
151✔
158
                userns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_USER, &need_verify);
38✔
159
                if (userns_fd < 0 && userns_fd != -ENOPKG)
38✔
160
                        return userns_fd;
161
        }
162

163
        if (ret_root_fd) {
151✔
164
                const char *root;
113✔
165

166
                root = procfs_file_alloca(pidref->pid, "root");
113✔
167
                root_fd = RET_NERRNO(open(root, O_CLOEXEC|O_DIRECTORY));
113✔
UNCOV
168
                if (root_fd == -ENOENT && proc_mounted() == 0)
×
169
                        return -ENOSYS;
170
                if (root_fd < 0)
113✔
171
                        return root_fd;
172

173
                need_verify = true;
113✔
174
        }
175

176
        if (need_verify) {
151✔
177
                r = pidref_verify(pidref);
113✔
178
                if (r < 0)
113✔
179
                        return r;
180
        }
181

182
        if (ret_pidns_fd)
151✔
183
                *ret_pidns_fd = TAKE_FD(pidns_fd);
112✔
184

185
        if (ret_mntns_fd)
151✔
186
                *ret_mntns_fd = TAKE_FD(mntns_fd);
113✔
187

188
        if (ret_netns_fd)
151✔
189
                *ret_netns_fd = TAKE_FD(netns_fd);
41✔
190

191
        if (ret_userns_fd)
151✔
192
                *ret_userns_fd = TAKE_FD(userns_fd);
38✔
193

194
        if (ret_root_fd)
151✔
195
                *ret_root_fd = TAKE_FD(root_fd);
113✔
196

197
        return 0;
198
}
199

200
int namespace_open(
36✔
201
                pid_t pid,
202
                int *ret_pidns_fd,
203
                int *ret_mntns_fd,
204
                int *ret_netns_fd,
205
                int *ret_userns_fd,
206
                int *ret_root_fd) {
207

208
        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
36✔
209
        int r;
36✔
210

211
        r = pidref_set_pid(&pidref, pid);
36✔
212
        if (r < 0)
36✔
213
                return r;
214

215
        return pidref_namespace_open(&pidref, ret_pidns_fd, ret_mntns_fd, ret_netns_fd, ret_userns_fd, ret_root_fd);
36✔
216
}
217

218
int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
363✔
219
        int r;
363✔
220

221
        /* Block dlopen() now, to avoid us inadvertently loading shared library from another namespace */
222
        block_dlopen();
363✔
223

224
        if (userns_fd >= 0) {
363✔
225
                /* Can't setns to your own userns, since then you could escalate from non-root to root in
226
                 * your own namespace, so check if namespaces are equal before attempting to enter. */
227

228
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
14✔
229
                if (r < 0)
14✔
230
                        return r;
231
                if (r > 0)
14✔
232
                        userns_fd = -EBADF;
2✔
233
        }
234

235
        if (pidns_fd >= 0)
363✔
236
                if (setns(pidns_fd, CLONE_NEWPID) < 0)
59✔
237
                        return -errno;
×
238

239
        if (mntns_fd >= 0)
363✔
240
                if (setns(mntns_fd, CLONE_NEWNS) < 0)
296✔
241
                        return -errno;
×
242

243
        if (netns_fd >= 0)
363✔
244
                if (setns(netns_fd, CLONE_NEWNET) < 0)
56✔
245
                        return -errno;
×
246

247
        if (userns_fd >= 0)
363✔
248
                if (setns(userns_fd, CLONE_NEWUSER) < 0)
12✔
249
                        return -errno;
×
250

251
        if (root_fd >= 0) {
363✔
252
                if (fchdir(root_fd) < 0)
60✔
UNCOV
253
                        return -errno;
×
254

255
                if (chroot(".") < 0)
60✔
UNCOV
256
                        return -errno;
×
257
        }
258

259
        if (userns_fd >= 0)
363✔
260
                return reset_uid_gid();
12✔
261

262
        return 0;
263
}
264

265
int fd_is_namespace(int fd, NamespaceType type) {
337✔
266
        int r;
337✔
267

268
        /* Checks whether the specified file descriptor refers to a namespace (of type if type != _NAMESPACE_INVALID). */
269

270
        assert(fd >= 0);
337✔
271
        assert(type < _NAMESPACE_TYPE_MAX);
337✔
272

273
        r = fd_is_fs_type(fd, NSFS_MAGIC);
337✔
274
        if (r <= 0)
337✔
275
                return r;
276

277
        if (type < 0)
334✔
278
                return true;
279

280
        int clone_flag = ioctl(fd, NS_GET_NSTYPE);
334✔
281
        if (clone_flag < 0)
334✔
UNCOV
282
                return -errno;
×
283

284
        NamespaceType found_type = clone_flag_to_namespace_type(clone_flag);
334✔
285
        if (found_type < 0)
334✔
286
                return -EBADF; /* Uh? Unknown namespace type? */
287

288
        return found_type == type;
334✔
289
}
290

291
int is_our_namespace(int fd, NamespaceType type) {
155✔
292
        int r;
155✔
293

294
        assert(fd >= 0);
155✔
295
        assert(type < _NAMESPACE_TYPE_MAX);
155✔
296

297
        r = fd_is_namespace(fd, type);
155✔
298
        if (r < 0)
155✔
299
                return r;
155✔
300
        if (r == 0) /* Not a namespace or not of the right type? */
155✔
301
                return -EUCLEAN;
302

303
        _cleanup_close_ int our_ns = namespace_open_by_type(type);
310✔
304
        if (our_ns < 0)
155✔
305
                return our_ns;
306

307
        return fd_inode_same(fd, our_ns);
155✔
308
}
309

310
int namespace_is_init(NamespaceType type) {
4,843✔
311
        int r;
4,843✔
312

313
        assert(type >= 0);
4,843✔
314
        assert(type < _NAMESPACE_TYPE_MAX);
4,843✔
315

316
        if (namespace_info[type].root_inode == 0)
4,843✔
317
                return -EBADR; /* Cannot answer this question */
4,843✔
318

319
        const char *p = pid_namespace_path(0, type);
24,205✔
320

321
        struct stat st;
4,841✔
322
        r = RET_NERRNO(stat(p, &st));
4,841✔
323
        if (r == -ENOENT) {
14✔
324
                /* If the /proc/ns/<type> API is not around in /proc/ then ns is off in the kernel and we are in the init ns */
325
                r = proc_mounted();
14✔
326
                if (r < 0)
14✔
327
                        return -ENOENT; /* If we can't determine if /proc/ is mounted propagate original error */
328

329
                return r ? true : -ENOSYS;
14✔
330
        }
331
        if (r < 0)
4,827✔
332
                return r;
333

334
        return st.st_ino == namespace_info[type].root_inode;
4,827✔
335
}
336

337
int pidref_in_same_namespace(PidRef *pid1, PidRef *pid2, NamespaceType type) {
95✔
338
        _cleanup_close_ int ns1 = -EBADF, ns2 = -EBADF;
95✔
339

340
        /* Accepts NULL to indicate our own process */
341

342
        assert(!pid1 || pidref_is_set(pid1));
95✔
343
        assert(!pid2 || pidref_is_set(pid2));
95✔
344
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
95✔
345

346
        if (pidref_equal(pid1, pid2))
95✔
347
                return true;
348

349
        if (!pid1)
95✔
350
                ns1 = namespace_open_by_type(type);
77✔
351
        else
352
                ns1 = pidref_namespace_open_by_type(pid1, type);
18✔
353
        if (ns1 < 0)
95✔
354
                return ns1;
355

356
        if (!pid2)
95✔
357
                ns2 = namespace_open_by_type(type);
6✔
358
        else
359
                ns2 = pidref_namespace_open_by_type(pid2, type);
89✔
360
        if (ns2 < 0)
95✔
361
                return ns2;
362

363
        return fd_inode_same(ns1, ns2);
95✔
364
}
365

366
int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) {
2✔
367
        assert(pid1 >= 0);
2✔
368
        assert(pid2 >= 0);
2✔
369
        return pidref_in_same_namespace(pid1 == 0 ? NULL : &PIDREF_MAKE_FROM_PID(pid1),
2✔
UNCOV
370
                                        pid2 == 0 ? NULL : &PIDREF_MAKE_FROM_PID(pid2),
×
371
                                        type);
372
}
373

374
int namespace_get_leader(PidRef *pidref, NamespaceType type, PidRef *ret) {
3✔
375
        int r;
3✔
376

377
        /* Note: we don't bother with pidref_is_set()/pidref_is_remote() here, as the first call we do,
378
         * pidref_get_ppid_as_pidref() calls those anyway */
379

380
        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
3✔
381
        assert(ret);
3✔
382

383
        _cleanup_(pidref_done) PidRef current = PIDREF_NULL;
3✔
384
        PidRef *c = pidref;
3✔
385

386
        for (;;) {
5✔
387
                _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
3✔
388

389
                r = pidref_get_ppid_as_pidref(c, &parent);
8✔
390
                if (r < 0)
8✔
391
                        return r;
392

393
                r = pidref_in_same_namespace(c, &parent, type);
8✔
394
                if (r < 0)
8✔
395
                        return r;
396
                if (r == 0) {
8✔
397
                        /* If the parent and the child are not in the same namespace, then the child is
398
                         * the leader we are looking for. */
399

400
                        if (pidref_is_set(&current))
3✔
401
                                *ret = TAKE_PIDREF(current);
3✔
402
                        else {
UNCOV
403
                                r = pidref_copy(c, ret);
×
UNCOV
404
                                if (r < 0)
×
405
                                        return r;
406
                        }
407

408
                        return 0;
3✔
409
                }
410

411
                pidref_done(&current);
5✔
412
                current = TAKE_PIDREF(parent);
5✔
413
                c = &current;
5✔
414
        }
415
}
416

417
int detach_mount_namespace(void) {
209✔
418
        /* Detaches the mount namespace, disabling propagation from our namespace to the host. Sets
419
         * propagation first to MS_SLAVE for all mounts (disabling propagation), and then back to MS_SHARED
420
         * (so that we create a new peer group).  */
421

422
        if (unshare(CLONE_NEWNS) < 0)
209✔
UNCOV
423
                return log_debug_errno(errno, "Failed to acquire mount namespace: %m");
×
424

425
        if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
209✔
UNCOV
426
                return log_debug_errno(errno, "Failed to set mount propagation to MS_SLAVE for all mounts: %m");
×
427

428
        if (mount(NULL, "/", NULL, MS_SHARED | MS_REC, NULL) < 0)
209✔
UNCOV
429
                return log_debug_errno(errno, "Failed to set mount propagation back to MS_SHARED for all mounts: %m");
×
430

431
        return 0;
432
}
433

434
int detach_mount_namespace_harder(uid_t target_uid, gid_t target_gid) {
62✔
435
        uid_t from_uid;
62✔
436
        gid_t from_gid;
62✔
437
        int r;
62✔
438

439
        /* Tried detach_mount_namespace() first. If that doesn't work due to permissions, opens up an
440
         * unprivileged user namespace with a mapping of the originating UID/GID to the specified target
441
         * UID/GID. Then, tries detach_mount_namespace() again.
442
         *
443
         * Or in other words: tries much harder to get a mount namespace, making use of unprivileged user
444
         * namespaces if need be.
445
         *
446
         * Note that after this function completed:
447
         *
448
         *    → if we had privs, afterwards uids/gids on files and processes are as before
449
         *
450
         *    → if we had no privs, our own id and all our files will show up owned by target_uid/target_gid,
451
         *    and everything else owned by nobody.
452
         *
453
         * Yes, that's quite a difference. */
454

455
        if (!uid_is_valid(target_uid))
62✔
456
                return -EINVAL;
457
        if (!gid_is_valid(target_gid))
62✔
458
                return -EINVAL;
459

460
        r = detach_mount_namespace();
62✔
461
        if (r != -EPERM)
62✔
462
                return r;
463

464
        from_uid = getuid();
×
UNCOV
465
        from_gid = getgid();
×
466

UNCOV
467
        if (unshare(CLONE_NEWUSER) < 0)
×
468
                return log_debug_errno(errno, "Failed to acquire user namespace: %m");
×
469

UNCOV
470
        r = write_string_filef("/proc/self/uid_map", 0,
×
471
                               UID_FMT " " UID_FMT " 1\n", target_uid, from_uid);
472
        if (r < 0)
×
473
                return log_debug_errno(r, "Failed to write uid map: %m");
×
474

475
        r = write_string_file("/proc/self/setgroups", "deny", 0);
×
UNCOV
476
        if (r < 0)
×
477
                return log_debug_errno(r, "Failed to write setgroups file: %m");
×
478

UNCOV
479
        r = write_string_filef("/proc/self/gid_map", 0,
×
480
                               GID_FMT " " GID_FMT " 1\n", target_gid, from_gid);
UNCOV
481
        if (r < 0)
×
UNCOV
482
                return log_debug_errno(r, "Failed to write gid map: %m");
×
483

UNCOV
484
        return detach_mount_namespace();
×
485
}
486

487
int detach_mount_namespace_userns(int userns_fd) {
8✔
488
        int r;
8✔
489

490
        assert(userns_fd >= 0);
8✔
491

492
        if (setns(userns_fd, CLONE_NEWUSER) < 0)
8✔
493
                return log_debug_errno(errno, "Failed to join user namespace: %m");
×
494

495
        r = reset_uid_gid();
8✔
496
        if (r < 0)
8✔
UNCOV
497
                return log_debug_errno(r, "Failed to become root in user namespace: %m");
×
498

499
        return detach_mount_namespace();
8✔
500
}
501

502
int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_range) {
2✔
503
        _cleanup_free_ char *buffer = NULL;
2✔
504
        const char *range, *shift;
2✔
505
        int r;
2✔
506
        uid_t uid_shift, uid_range = 65536;
2✔
507

508
        assert(s);
2✔
509

510
        range = strchr(s, ':');
2✔
511
        if (range) {
2✔
UNCOV
512
                buffer = strndup(s, range - s);
×
513
                if (!buffer)
×
514
                        return log_oom();
×
515
                shift = buffer;
×
516

UNCOV
517
                range++;
×
UNCOV
518
                r = safe_atou32(range, &uid_range);
×
UNCOV
519
                if (r < 0)
×
UNCOV
520
                        return log_error_errno(r, "Failed to parse UID range \"%s\": %m", range);
×
521
        } else
522
                shift = s;
523

524
        r = parse_uid(shift, &uid_shift);
2✔
525
        if (r < 0)
2✔
526
                return log_error_errno(r, "Failed to parse UID \"%s\": %m", s);
2✔
527

528
        if (!userns_shift_range_valid(uid_shift, uid_range))
×
UNCOV
529
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID range cannot be empty or go beyond " UID_FMT ".", UID_INVALID);
×
530

531
        if (ret_uid_shift)
×
UNCOV
532
                *ret_uid_shift = uid_shift;
×
533

UNCOV
534
        if (ret_uid_range)
×
UNCOV
535
                *ret_uid_range = uid_range;
×
536

537
        return 0;
538
}
539

540
int userns_acquire_empty(void) {
41✔
541
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
41✔
542
        int r;
41✔
543

544
        r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
41✔
545
        if (r < 0)
41✔
546
                return r;
547
        assert(r > 0);
41✔
548

549
        return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
41✔
550
}
551

552
int userns_acquire(const char *uid_map, const char *gid_map, bool setgroups_deny) {
3,094✔
553
        char path[STRLEN("/proc//setgroups") + DECIMAL_STR_MAX(pid_t) + 1];
3,094✔
554
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
3,094✔
555
        int r;
3,094✔
556

557
        assert(uid_map);
3,094✔
558
        assert(gid_map);
3,094✔
559

560
        /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it,
561
         * and then kills the process again. This way we have a userns fd that is not bound to any
562
         * process. We can use that for file system mounts and similar. */
563

564
        r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
3,094✔
565
        if (r < 0)
3,094✔
566
                return r;
567
        assert(r > 0);
3,094✔
568

569
        xsprintf(path, "/proc/" PID_FMT "/uid_map", pid.pid);
3,094✔
570
        r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
3,094✔
571
        if (r < 0)
3,094✔
UNCOV
572
                return log_debug_errno(r, "Failed to write UID map: %m");
×
573

574
        if (setgroups_deny) {
3,094✔
575
                xsprintf(path, "/proc/" PID_FMT "/setgroups", pid.pid);
3,094✔
576
                r = write_string_file(path, "deny", WRITE_STRING_FILE_DISABLE_BUFFER);
3,094✔
577
                if (r < 0)
3,094✔
UNCOV
578
                        return log_debug_errno(r, "Failed to write setgroups file: %m");
×
579
        }
580

581
        xsprintf(path, "/proc/" PID_FMT "/gid_map", pid.pid);
3,094✔
582
        r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
3,094✔
583
        if (r < 0)
3,094✔
UNCOV
584
                return log_debug_errno(r, "Failed to write GID map: %m");
×
585

586
        return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
3,094✔
587
}
588

589
int userns_acquire_self_root(void) {
2,914✔
590

591
        /* Returns a user namespace with only our own uid/gid mapped to root, and everything else unmapped.
592
         *
593
         * Note: this can be acquired unprivileged! */
594

595
        _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
2,914✔
596
        if (asprintf(&uid_map, "0 " UID_FMT " 1", getuid()) < 0)
2,914✔
597
                return -ENOMEM;
598
        if (asprintf(&gid_map, "0 " GID_FMT " 1", getgid()) < 0)
2,914✔
599
                return -ENOMEM;
600

601
        return userns_acquire(uid_map, gid_map, /* setgroups_deny= */ true);
2,914✔
602
}
603

604
int userns_enter_and_pin(int userns_fd, pid_t *ret_pid) {
104✔
605
        _cleanup_close_pair_ int pfd[2] = EBADF_PAIR;
104✔
606
        _cleanup_(sigkill_waitp) pid_t pid = 0;
104✔
607
        ssize_t n;
104✔
608
        char x;
104✔
609
        int r;
104✔
610

611
        assert(userns_fd >= 0);
104✔
612
        assert(ret_pid);
104✔
613

614
        if (pipe2(pfd, O_CLOEXEC) < 0)
104✔
UNCOV
615
                return -errno;
×
616

617
        r = safe_fork_full(
208✔
618
                        "(sd-pinuserns)",
619
                        /* stdio_fds= */ NULL,
620
                        (int[]) { pfd[1], userns_fd }, 2,
104✔
621
                        FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL,
622
                        &pid);
623
        if (r < 0)
104✔
624
                return r;
625
        if (r == 0) {
104✔
626
                /* Child. */
627

UNCOV
628
                if (setns(userns_fd, CLONE_NEWUSER) < 0) {
×
629
                        log_debug_errno(errno, "Failed to join userns: %m");
×
UNCOV
630
                        _exit(EXIT_FAILURE);
×
631
                }
632

633
                userns_fd = safe_close(userns_fd);
×
634

UNCOV
635
                n = write(pfd[1], &(const char) { 'x' }, 1);
×
636
                if (n < 0) {
×
UNCOV
637
                        log_debug_errno(errno, "Failed to write to pipe: %m");
×
638
                        _exit(EXIT_FAILURE);
×
639
                }
UNCOV
640
                assert(n == 1);
×
641

UNCOV
642
                freeze();
×
643
        }
644

645
        pfd[1] = safe_close(pfd[1]);
104✔
646

647
        n = read(pfd[0], &x, 1);
104✔
648
        if (n < 0)
104✔
UNCOV
649
                return -errno;
×
650
        if (n == 0)
104✔
651
                return -EPROTO;
652
        assert(n == 1);
104✔
653
        assert(x == 'x');
104✔
654

655
        *ret_pid = TAKE_PID(pid);
104✔
656
        return 0;
104✔
657
}
658

659
bool userns_supported(void) {
265✔
660
        return access("/proc/self/uid_map", F_OK) >= 0;
265✔
661
}
662

663
int userns_get_base_uid(int userns_fd, uid_t *ret_uid, gid_t *ret_gid) {
26✔
664
        _cleanup_(sigkill_waitp) pid_t pid = 0;
26✔
665
        int r;
26✔
666

667
        assert(userns_fd >= 0);
26✔
668

669
        r = userns_enter_and_pin(userns_fd, &pid);
26✔
670
        if (r < 0)
26✔
671
                return r;
672

673
        uid_t uid;
26✔
674
        r = uid_map_search_root(pid, UID_RANGE_USERNS_OUTSIDE, &uid);
26✔
675
        if (r < 0)
26✔
676
                return r;
677

678
        gid_t gid;
25✔
679
        r = uid_map_search_root(pid, GID_RANGE_USERNS_OUTSIDE, &gid);
25✔
680
        if (r < 0)
25✔
681
                return r;
682

683
        if (!ret_gid && uid != gid)
25✔
684
                return -EUCLEAN;
685

686
        if (ret_uid)
24✔
687
                *ret_uid = uid;
24✔
688
        if (ret_gid)
24✔
689
                *ret_gid = gid;
1✔
690

691
        return 0;
692
}
693

694
int process_is_owned_by_uid(const PidRef *pidref, uid_t uid) {
14✔
695
        int r;
14✔
696

697
        /* Checks if the specified process either is owned directly by the specified user, or if it is inside
698
         * a user namespace owned by it. */
699

700
        assert(uid_is_valid(uid));
14✔
701

702
        uid_t process_uid;
14✔
703
        r = pidref_get_uid(pidref, &process_uid);
14✔
704
        if (r < 0)
14✔
705
                return r;
14✔
706
        if (process_uid == uid)
14✔
707
                return true;
708

709
        _cleanup_close_ int userns_fd = -EBADF;
14✔
710
        userns_fd = pidref_namespace_open_by_type(pidref, NAMESPACE_USER);
11✔
711
        if (userns_fd == -ENOPKG) /* If userns is not supported, then they don't matter for ownership */
11✔
712
                return false;
713
        if (userns_fd < 0)
11✔
714
                return userns_fd;
715

UNCOV
716
        for (unsigned iteration = 0;; iteration++) {
×
717
                uid_t ns_uid;
11✔
718

719
                /* This process is in our own userns? Then we are done, in our own userns only the UIDs
720
                 * themselves matter. */
721
                r = is_our_namespace(userns_fd, NAMESPACE_USER);
11✔
722
                if (r < 0)
11✔
723
                        return r;
11✔
724
                if (r > 0)
11✔
725
                        return false;
726

727
                if (ioctl(userns_fd, NS_GET_OWNER_UID, &ns_uid) < 0)
9✔
UNCOV
728
                        return -errno;
×
729
                if (ns_uid == uid)
9✔
730
                        return true;
731

732
                /* Paranoia check */
UNCOV
733
                if (iteration > 16)
×
734
                        return log_debug_errno(SYNTHETIC_ERRNO(ELOOP), "Giving up while tracing parents of user namespaces after %u steps.", iteration);
×
735

736
                /* Go up the tree */
737
                _cleanup_close_ int parent_fd = ioctl(userns_fd, NS_GET_USERNS);
11✔
738
                if (parent_fd < 0) {
×
UNCOV
739
                        if (errno == EPERM) /* EPERM means we left our own userns */
×
740
                                return false;
741

UNCOV
742
                        return -errno;
×
743
                }
744

UNCOV
745
                close_and_replace(userns_fd, parent_fd);
×
746
        }
747
}
748

749
int is_idmapping_supported(const char *path) {
2,913✔
750
        _cleanup_close_ int mount_fd = -EBADF, userns_fd = -EBADF, dir_fd = -EBADF;
5,826✔
751
        int r;
2,913✔
752

753
        assert(path);
2,913✔
754

755
        if (!mount_new_api_supported())
2,913✔
756
                return false;
757

758
        userns_fd = r = userns_acquire_self_root();
2,913✔
759
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
2,913✔
760
                return false;
761
        if (r == -ENOSPC) {
2,913✔
762
                log_debug_errno(r, "Failed to acquire new user namespace, user.max_user_namespaces seems to be exhausted or maybe even zero, assuming ID-mapping is not supported: %m");
×
UNCOV
763
                return false;
×
764
        }
765
        if (r < 0)
2,913✔
UNCOV
766
                return log_debug_errno(r, "Failed to acquire new user namespace for checking if '%s' supports ID-mapping: %m", path);
×
767

768
        dir_fd = r = RET_NERRNO(open(path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2,913✔
769
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
5,826✔
770
                return false;
771
        if (r < 0)
2,913✔
772
                return log_debug_errno(r, "Failed to open '%s', cannot determine if ID-mapping is supported: %m", path);
×
773

774
        mount_fd = r = RET_NERRNO(open_tree(dir_fd, "", AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC));
2,913✔
775
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
2,913✔
UNCOV
776
                return false;
×
777
        if (r < 0)
2,913✔
UNCOV
778
                return log_debug_errno(r, "Failed to open mount tree '%s', cannot determine if ID-mapping is supported: %m", path);
×
779

780
        r = RET_NERRNO(mount_setattr(mount_fd, "", AT_EMPTY_PATH,
2,913✔
781
                       &(struct mount_attr) {
2,913✔
782
                                .attr_set = MOUNT_ATTR_IDMAP | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RDONLY | MOUNT_ATTR_NODEV,
783
                                .userns_fd = userns_fd,
784
                        }, sizeof(struct mount_attr)));
785
        if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
2,913✔
UNCOV
786
                return false;
×
787
        if (r < 0)
2,913✔
UNCOV
788
                return log_debug_errno(r, "Failed to set mount attribute to '%s', cannot determine if ID-mapping is supported: %m", path);
×
789

790
        return true;
791
}
792

793
int netns_acquire(void) {
7✔
794
        _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
7✔
795
        int r;
7✔
796

797
        /* Forks off a process in a new network namespace, acquires a network namespace fd, and then kills
798
         * the process again. This way we have a netns fd that is not bound to any process. */
799

800
        r = pidref_safe_fork("(sd-mknetns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_NETNS|FORK_FREEZE, &pid);
7✔
801
        if (r < 0)
7✔
UNCOV
802
                return log_debug_errno(r, "Failed to fork process into new netns: %m");
×
803
        assert(r > 0);
7✔
804

805
        return pidref_namespace_open_by_type(&pid, NAMESPACE_NET);
7✔
806
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc