• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 19520565317

19 Nov 2025 11:19PM UTC coverage: 72.548% (+0.1%) from 72.449%
19520565317

push

github

web-flow
core: Verify inherited FDs are writable for stdout/stderr (#39674)

When inheriting file descriptors for stdout/stderr (either from stdin or
when making stderr inherit from stdout), we previously just assumed they
would be writable and dup'd them. This could lead to broken setups if
the inherited FD was actually opened read-only.

Before dup'ing any inherited FDs to stdout/stderr, verify they are
actually writable using the new fd_is_writable() helper. If not, fall
back to /dev/null (or reopen the terminal in the TTY case) with a
warning, rather than silently creating a broken setup where output
operations would fail.

31 of 44 new or added lines in 3 files covered. (70.45%)

813 existing lines in 43 files now uncovered.

308541 of 425291 relevant lines covered (72.55%)

1188151.68 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.97
/src/basic/process-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <linux/oom.h>
4
#include <pthread.h>
5
#include <spawn.h>
6
#include <stdio.h>
7
#include <sys/mount.h>
8
#include <sys/personality.h>
9
#include <sys/prctl.h>
10
#include <sys/wait.h>
11
#include <syslog.h>
12
#include <threads.h>
13
#include <unistd.h>
14
#if HAVE_VALGRIND_VALGRIND_H
15
#include <valgrind/valgrind.h>
16
#endif
17

18
#include "sd-messages.h"
19

20
#include "alloc-util.h"
21
#include "architecture.h"
22
#include "argv-util.h"
23
#include "cgroup-util.h"
24
#include "dirent-util.h"
25
#include "env-file.h"
26
#include "errno-util.h"
27
#include "escape.h"
28
#include "fd-util.h"
29
#include "fileio.h"
30
#include "fs-util.h"
31
#include "hostname-util.h"
32
#include "io-util.h"
33
#include "iovec-util.h"
34
#include "locale-util.h"
35
#include "log.h"
36
#include "memory-util.h"
37
#include "mountpoint-util.h"
38
#include "namespace-util.h"
39
#include "nulstr-util.h"
40
#include "parse-util.h"
41
#include "path-util.h"
42
#include "pidfd-util.h"
43
#include "pidref.h"
44
#include "process-util.h"
45
#include "raw-clone.h"
46
#include "rlimit-util.h"
47
#include "signal-util.h"
48
#include "socket-util.h"
49
#include "stat-util.h"
50
#include "stdio-util.h"
51
#include "string-table.h"
52
#include "string-util.h"
53
#include "time-util.h"
54
#include "user-util.h"
55

56
/* The kernel limits userspace processes to TASK_COMM_LEN (16 bytes), but allows higher values for its own
57
 * workers, e.g. "kworker/u9:3-kcryptd/253:0". Let's pick a fixed smallish limit that will work for the kernel.
58
 */
59
#define COMM_MAX_LEN 128
60

61
static int get_process_state(pid_t pid) {
12,821✔
62
        _cleanup_free_ char *line = NULL;
12,821✔
63
        const char *p;
12,821✔
64
        char state;
12,821✔
65
        int r;
12,821✔
66

67
        assert(pid >= 0);
12,821✔
68

69
        /* Shortcut: if we are enquired about our own state, we are obviously running */
70
        if (pid == 0 || pid == getpid_cached())
12,821✔
71
                return (unsigned char) 'R';
×
72

73
        p = procfs_file_alloca(pid, "stat");
12,821✔
74

75
        r = read_one_line_file(p, &line);
12,821✔
76
        if (r == -ENOENT)
12,821✔
77
                return -ESRCH;
78
        if (r < 0)
10,062✔
79
                return r;
80

81
        p = strrchr(line, ')');
10,061✔
82
        if (!p)
10,061✔
83
                return -EIO;
84

85
        p++;
10,061✔
86

87
        if (sscanf(p, " %c", &state) != 1)
10,061✔
88
                return -EIO;
89

90
        return (unsigned char) state;
10,061✔
91
}
92

93
int pid_get_comm(pid_t pid, char **ret) {
43,101✔
94
        _cleanup_free_ char *escaped = NULL, *comm = NULL;
43,101✔
95
        int r;
43,101✔
96

97
        assert(pid >= 0);
43,101✔
98
        assert(ret);
43,101✔
99

100
        if (pid == 0 || pid == getpid_cached()) {
43,101✔
101
                comm = new0(char, TASK_COMM_LEN + 1); /* Must fit in 16 byte according to prctl(2) */
22,622✔
102
                if (!comm)
22,622✔
103
                        return -ENOMEM;
104

105
                if (prctl(PR_GET_NAME, comm) < 0)
22,622✔
106
                        return -errno;
×
107
        } else {
108
                const char *p;
20,479✔
109

110
                p = procfs_file_alloca(pid, "comm");
20,479✔
111

112
                /* Note that process names of kernel threads can be much longer than TASK_COMM_LEN */
113
                r = read_one_line_file(p, &comm);
20,479✔
114
                if (r == -ENOENT)
20,479✔
115
                        return -ESRCH;
116
                if (r < 0)
15,952✔
117
                        return r;
118
        }
119

120
        escaped = new(char, COMM_MAX_LEN);
38,571✔
121
        if (!escaped)
38,571✔
122
                return -ENOMEM;
123

124
        /* Escape unprintable characters, just in case, but don't grow the string beyond the underlying size */
125
        cellescape(escaped, COMM_MAX_LEN, comm);
38,571✔
126

127
        *ret = TAKE_PTR(escaped);
38,571✔
128
        return 0;
38,571✔
129
}
130

131
int pidref_get_comm(const PidRef *pid, char **ret) {
156✔
132
        _cleanup_free_ char *comm = NULL;
156✔
133
        int r;
156✔
134

135
        if (!pidref_is_set(pid))
156✔
136
                return -ESRCH;
137

138
        if (pidref_is_remote(pid))
312✔
139
                return -EREMOTE;
140

141
        r = pid_get_comm(pid->pid, &comm);
156✔
142
        if (r < 0)
156✔
143
                return r;
144

145
        r = pidref_verify(pid);
156✔
146
        if (r < 0)
156✔
147
                return r;
148

149
        if (ret)
156✔
150
                *ret = TAKE_PTR(comm);
156✔
151
        return 0;
152
}
153

154
static int pid_get_cmdline_nulstr(
19,344✔
155
                pid_t pid,
156
                size_t max_size,
157
                ProcessCmdlineFlags flags,
158
                char **ret,
159
                size_t *ret_size) {
160

161
        _cleanup_free_ char *t = NULL;
19,344✔
162
        const char *p;
19,344✔
163
        size_t k;
19,344✔
164
        int r;
19,344✔
165

166
        /* Retrieves a process' command line as a "sized nulstr", i.e. possibly without the last NUL, but
167
         * with a specified size.
168
         *
169
         * If PROCESS_CMDLINE_COMM_FALLBACK is specified in flags and the process has no command line set
170
         * (the case for kernel threads), or has a command line that resolves to the empty string, will
171
         * return the "comm" name of the process instead. This will use at most _SC_ARG_MAX bytes of input
172
         * data.
173
         *
174
         * Returns an error, 0 if output was read but is truncated, 1 otherwise.
175
         */
176

177
        p = procfs_file_alloca(pid, "cmdline");
19,560✔
178
        r = read_virtual_file(p, max_size, &t, &k); /* Let's assume that each input byte results in >= 1
19,344✔
179
                                                     * columns of output. We ignore zero-width codepoints. */
180
        if (r == -ENOENT)
19,344✔
181
                return -ESRCH;
182
        if (r < 0)
14,729✔
183
                return r;
184

185
        if (k == 0) {
14,727✔
186
                if (!(flags & PROCESS_CMDLINE_COMM_FALLBACK))
453✔
187
                        return -ENOENT;
430✔
188

189
                /* Kernel threads have no argv[] */
190
                _cleanup_free_ char *comm = NULL;
23✔
191

192
                r = pid_get_comm(pid, &comm);
23✔
193
                if (r < 0)
23✔
194
                        return r;
195

196
                free(t);
23✔
197
                t = strjoin("[", comm, "]");
23✔
198
                if (!t)
23✔
199
                        return -ENOMEM;
200

201
                k = strlen(t);
23✔
202
                r = k <= max_size;
23✔
203
                if (r == 0) /* truncation */
23✔
204
                        t[max_size] = '\0';
12✔
205
        }
206

207
        if (ret)
14,297✔
208
                *ret = TAKE_PTR(t);
14,297✔
209
        if (ret_size)
14,297✔
210
                *ret_size = k;
14,297✔
211

212
        return r;
213
}
214

215
int pid_get_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags, char **ret) {
14,699✔
216
        _cleanup_free_ char *t = NULL;
14,699✔
217
        size_t k;
14,699✔
218
        char *ans;
14,699✔
219

220
        assert(pid >= 0);
14,699✔
221
        assert(ret);
14,699✔
222

223
        /* Retrieve and format a command line. See above for discussion of retrieval options.
224
         *
225
         * There are two main formatting modes:
226
         *
227
         * - when PROCESS_CMDLINE_QUOTE is specified, output is quoted in C/Python style. If no shell special
228
         *   characters are present, this output can be copy-pasted into the terminal to execute. UTF-8
229
         *   output is assumed.
230
         *
231
         * - otherwise, a compact non-roundtrippable form is returned. Non-UTF8 bytes are replaced by �. The
232
         *   returned string is of the specified console width at most, abbreviated with an ellipsis.
233
         *
234
         * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
235
         * PROCESS_CMDLINE_COMM_FALLBACK is not specified). Returns 0 and sets *line otherwise. */
236

237
        int full = pid_get_cmdline_nulstr(pid, max_columns, flags, &t, &k);
14,699✔
238
        if (full < 0)
14,699✔
239
                return full;
240

241
        if (flags & (PROCESS_CMDLINE_QUOTE | PROCESS_CMDLINE_QUOTE_POSIX)) {
9,727✔
242
                ShellEscapeFlags shflags = SHELL_ESCAPE_EMPTY |
9,347✔
243
                        FLAGS_SET(flags, PROCESS_CMDLINE_QUOTE_POSIX) * SHELL_ESCAPE_POSIX;
9,347✔
244

245
                assert(!(flags & PROCESS_CMDLINE_USE_LOCALE));
9,347✔
246

247
                _cleanup_strv_free_ char **args = NULL;
9,347✔
248

249
                /* Drop trailing NULs, otherwise strv_parse_nulstr() adds additional empty strings at the end.
250
                 * See also issue #21186. */
251
                args = strv_parse_nulstr_full(t, k, /* drop_trailing_nuls = */ true);
9,347✔
252
                if (!args)
9,347✔
253
                        return -ENOMEM;
254

255
                ans = quote_command_line(args, shflags);
9,347✔
256
                if (!ans)
9,347✔
257
                        return -ENOMEM;
258
        } else {
259
                /* Arguments are separated by NULs. Let's replace those with spaces. */
260
                for (size_t i = 0; i < k - 1; i++)
18,226✔
261
                        if (t[i] == '\0')
17,846✔
262
                                t[i] = ' ';
648✔
263

264
                delete_trailing_chars(t, WHITESPACE);
380✔
265

266
                bool eight_bit = (flags & PROCESS_CMDLINE_USE_LOCALE) && !is_locale_utf8();
380✔
267

268
                ans = escape_non_printable_full(t, max_columns,
1,140✔
269
                                                eight_bit * XESCAPE_8_BIT | !full * XESCAPE_FORCE_ELLIPSIS);
706✔
270
                if (!ans)
380✔
271
                        return -ENOMEM;
272

273
                ans = str_realloc(ans);
380✔
274
        }
275

276
        *ret = ans;
9,727✔
277
        return 0;
9,727✔
278
}
279

280
int pidref_get_cmdline(const PidRef *pid, size_t max_columns, ProcessCmdlineFlags flags, char **ret) {
46✔
281
        _cleanup_free_ char *s = NULL;
46✔
282
        int r;
46✔
283

284
        if (!pidref_is_set(pid))
46✔
285
                return -ESRCH;
286

287
        if (pidref_is_remote(pid))
92✔
288
                return -EREMOTE;
289

290
        r = pid_get_cmdline(pid->pid, max_columns, flags, &s);
46✔
291
        if (r < 0)
46✔
292
                return r;
293

294
        r = pidref_verify(pid);
46✔
295
        if (r < 0)
46✔
296
                return r;
297

298
        if (ret)
46✔
299
                *ret = TAKE_PTR(s);
46✔
300
        return 0;
301
}
302

303
int pid_get_cmdline_strv(pid_t pid, ProcessCmdlineFlags flags, char ***ret) {
4,645✔
304
        _cleanup_free_ char *t = NULL;
4,645✔
305
        char **args;
4,645✔
306
        size_t k;
4,645✔
307
        int r;
4,645✔
308

309
        assert(pid >= 0);
4,645✔
310
        assert((flags & ~PROCESS_CMDLINE_COMM_FALLBACK) == 0);
4,645✔
311
        assert(ret);
4,645✔
312

313
        r = pid_get_cmdline_nulstr(pid, SIZE_MAX, flags, &t, &k);
4,645✔
314
        if (r < 0)
4,645✔
315
                return r;
316

317
        args = strv_parse_nulstr_full(t, k, /* drop_trailing_nuls = */ true);
4,570✔
318
        if (!args)
4,570✔
319
                return -ENOMEM;
320

321
        *ret = args;
4,570✔
322
        return 0;
4,570✔
323
}
324

325
int pidref_get_cmdline_strv(const PidRef *pid, ProcessCmdlineFlags flags, char ***ret) {
×
326
        _cleanup_strv_free_ char **args = NULL;
×
327
        int r;
×
328

329
        if (!pidref_is_set(pid))
×
330
                return -ESRCH;
331

332
        if (pidref_is_remote(pid))
×
333
                return -EREMOTE;
334

335
        r = pid_get_cmdline_strv(pid->pid, flags, &args);
×
336
        if (r < 0)
×
337
                return r;
338

339
        r = pidref_verify(pid);
×
340
        if (r < 0)
×
341
                return r;
342

343
        if (ret)
×
344
                *ret = TAKE_PTR(args);
×
345

346
        return 0;
347
}
348

349
int container_get_leader(const char *machine, pid_t *pid) {
35✔
350
        _cleanup_free_ char *s = NULL, *class = NULL;
35✔
351
        const char *p;
35✔
352
        pid_t leader;
35✔
353
        int r;
35✔
354

355
        assert(machine);
35✔
356
        assert(pid);
35✔
357

358
        if (streq(machine, ".host")) {
35✔
359
                *pid = 1;
1✔
360
                return 0;
1✔
361
        }
362

363
        if (!hostname_is_valid(machine, 0))
34✔
364
                return -EINVAL;
365

366
        p = strjoina("/run/systemd/machines/", machine);
170✔
367
        r = parse_env_file(NULL, p,
34✔
368
                           "LEADER", &s,
369
                           "CLASS", &class);
370
        if (r == -ENOENT)
34✔
371
                return -EHOSTDOWN;
372
        if (r < 0)
34✔
373
                return r;
374
        if (!s)
34✔
375
                return -EIO;
376

377
        if (!streq_ptr(class, "container"))
34✔
378
                return -EIO;
379

380
        r = parse_pid(s, &leader);
34✔
381
        if (r < 0)
34✔
382
                return r;
383
        if (leader <= 1)
34✔
384
                return -EIO;
385

386
        *pid = leader;
34✔
387
        return 0;
34✔
388
}
389

390
int pid_is_kernel_thread(pid_t pid) {
3,785✔
391
        _cleanup_free_ char *line = NULL;
3,785✔
392
        unsigned long long flags;
3,785✔
393
        size_t l, i;
3,785✔
394
        const char *p;
3,785✔
395
        char *q;
3,785✔
396
        int r;
3,785✔
397

398
        if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
3,785✔
399
                return 0;
25✔
400
        if (!pid_is_valid(pid))
3,760✔
401
                return -EINVAL;
402

403
        p = procfs_file_alloca(pid, "stat");
3,760✔
404
        r = read_one_line_file(p, &line);
3,760✔
405
        if (r == -ENOENT)
3,760✔
406
                return -ESRCH;
407
        if (r < 0)
3,760✔
408
                return r;
409

410
        /* Skip past the comm field */
411
        q = strrchr(line, ')');
3,760✔
412
        if (!q)
3,760✔
413
                return -EINVAL;
414
        q++;
3,760✔
415

416
        /* Skip 6 fields to reach the flags field */
417
        for (i = 0; i < 6; i++) {
26,320✔
418
                l = strspn(q, WHITESPACE);
22,560✔
419
                if (l < 1)
22,560✔
420
                        return -EINVAL;
421
                q += l;
22,560✔
422

423
                l = strcspn(q, WHITESPACE);
22,560✔
424
                if (l < 1)
22,560✔
425
                        return -EINVAL;
426
                q += l;
22,560✔
427
        }
428

429
        /* Skip preceding whitespace */
430
        l = strspn(q, WHITESPACE);
3,760✔
431
        if (l < 1)
3,760✔
432
                return -EINVAL;
433
        q += l;
3,760✔
434

435
        /* Truncate the rest */
436
        l = strcspn(q, WHITESPACE);
3,760✔
437
        if (l < 1)
3,760✔
438
                return -EINVAL;
439
        q[l] = 0;
3,760✔
440

441
        r = safe_atollu(q, &flags);
3,760✔
442
        if (r < 0)
3,760✔
443
                return r;
444

445
        return !!(flags & PF_KTHREAD);
3,760✔
446
}
447

448
int pidref_is_kernel_thread(const PidRef *pid) {
1,580✔
449
        int result, r;
1,580✔
450

451
        if (!pidref_is_set(pid))
1,580✔
452
                return -ESRCH;
453

454
        if (pidref_is_remote(pid))
1,580✔
455
                return -EREMOTE;
456

457
        result = pid_is_kernel_thread(pid->pid);
1,580✔
458
        if (result < 0)
1,580✔
459
                return result;
460

461
        r = pidref_verify(pid); /* Verify that the PID wasn't reused since */
1,580✔
462
        if (r < 0)
1,580✔
463
                return r;
×
464

465
        return result;
466
}
467

468
static int get_process_link_contents(pid_t pid, const char *proc_file, char **ret) {
14,152✔
469
        const char *p;
14,152✔
470
        int r;
14,152✔
471

472
        assert(proc_file);
14,152✔
473

474
        p = procfs_file_alloca(pid, proc_file);
14,156✔
475

476
        r = readlink_malloc(p, ret);
14,152✔
477
        return (r == -ENOENT && proc_mounted() > 0) ? -ESRCH : r;
14,152✔
478
}
479

480
int get_process_exe(pid_t pid, char **ret) {
14,126✔
481
        char *d;
14,126✔
482
        int r;
14,126✔
483

484
        assert(pid >= 0);
14,126✔
485

486
        r = get_process_link_contents(pid, "exe", ret);
14,126✔
487
        if (r < 0)
14,126✔
488
                return r;
489

490
        if (ret) {
9,396✔
491
                d = endswith(*ret, " (deleted)");
9,396✔
492
                if (d)
9,396✔
493
                        *d = '\0';
×
494
        }
495

496
        return 0;
497
}
498

499
int pid_get_uid(pid_t pid, uid_t *ret) {
4,203✔
500
        int r;
4,203✔
501

502
        assert(pid >= 0);
4,203✔
503
        assert(ret);
4,203✔
504

505
        if (pid == 0 || pid == getpid_cached()) {
4,203✔
506
                *ret = getuid();
2✔
507
                return 0;
4,203✔
508
        }
509

510
        _cleanup_free_ char *v = NULL;
4,201✔
511
        r = procfs_file_get_field(pid, "status", "Uid", &v);
4,201✔
512
        if (r == -ENOENT)
4,201✔
513
                return -ESRCH;
514
        if (r < 0)
167✔
515
                return r;
516

517
        return parse_uid(v, ret);
167✔
518
}
519

520
int pidref_get_uid(const PidRef *pid, uid_t *ret) {
73✔
521
        int r;
73✔
522

523
        if (!pidref_is_set(pid))
73✔
524
                return -ESRCH;
73✔
525

526
        if (pidref_is_remote(pid))
73✔
527
                return -EREMOTE;
528

529
        if (pid->fd >= 0) {
73✔
530
                r = pidfd_get_uid(pid->fd, ret);
73✔
531
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
73✔
532
                        return r;
533
        }
534

535
        uid_t uid;
3✔
536
        r = pid_get_uid(pid->pid, &uid);
3✔
537
        if (r < 0)
3✔
538
                return r;
539

540
        r = pidref_verify(pid);
3✔
541
        if (r < 0)
3✔
542
                return r;
543

544
        if (ret)
3✔
545
                *ret = uid;
3✔
546
        return 0;
547
}
548

549
int get_process_gid(pid_t pid, gid_t *ret) {
4,200✔
550
        int r;
4,200✔
551

552
        assert(pid >= 0);
4,200✔
553
        assert(ret);
4,200✔
554

555
        if (pid == 0 || pid == getpid_cached()) {
4,200✔
556
                *ret = getgid();
1✔
557
                return 0;
4,200✔
558
        }
559

560
        _cleanup_free_ char *v = NULL;
4,199✔
561
        r = procfs_file_get_field(pid, "status", "Gid", &v);
4,199✔
562
        if (r == -ENOENT)
4,199✔
563
                return -ESRCH;
564
        if (r < 0)
165✔
565
                return r;
566

567
        return parse_gid(v, ret);
4,199✔
568
}
569

570
int get_process_cwd(pid_t pid, char **ret) {
13✔
571
        assert(pid >= 0);
13✔
572

573
        if (pid == 0 || pid == getpid_cached())
13✔
574
                return safe_getcwd(ret);
×
575

576
        return get_process_link_contents(pid, "cwd", ret);
13✔
577
}
578

579
int get_process_root(pid_t pid, char **ret) {
13✔
580
        assert(pid >= 0);
13✔
581
        return get_process_link_contents(pid, "root", ret);
13✔
582
}
583

584
#define ENVIRONMENT_BLOCK_MAX (5U*1024U*1024U)
585

586
int get_process_environ(pid_t pid, char **ret) {
15✔
587
        _cleanup_fclose_ FILE *f = NULL;
15✔
588
        _cleanup_free_ char *outcome = NULL;
15✔
589
        size_t sz = 0;
15✔
590
        const char *p;
15✔
591
        int r;
15✔
592

593
        assert(pid >= 0);
15✔
594
        assert(ret);
15✔
595

596
        p = procfs_file_alloca(pid, "environ");
15✔
597

598
        r = fopen_unlocked(p, "re", &f);
15✔
599
        if (r == -ENOENT)
15✔
600
                return -ESRCH;
601
        if (r < 0)
15✔
602
                return r;
603

604
        for (;;) {
6,400✔
605
                char c;
6,415✔
606

607
                if (sz >= ENVIRONMENT_BLOCK_MAX)
6,415✔
608
                        return -ENOBUFS;
×
609

610
                if (!GREEDY_REALLOC(outcome, sz + 5))
6,415✔
611
                        return -ENOMEM;
612

613
                r = safe_fgetc(f, &c);
6,415✔
614
                if (r < 0)
6,415✔
615
                        return r;
616
                if (r == 0)
6,415✔
617
                        break;
618

619
                if (c == '\0')
6,400✔
620
                        outcome[sz++] = '\n';
227✔
621
                else
622
                        sz += cescape_char(c, outcome + sz);
6,173✔
623
        }
624

625
        outcome[sz] = '\0';
15✔
626
        *ret = TAKE_PTR(outcome);
15✔
627

628
        return 0;
15✔
629
}
630

631
int pid_get_ppid(pid_t pid, pid_t *ret) {
1,528✔
632
        _cleanup_free_ char *line = NULL;
1,528✔
633
        unsigned long ppid;
1,528✔
634
        const char *p;
1,528✔
635
        int r;
1,528✔
636

637
        assert(pid >= 0);
1,528✔
638

639
        if (pid == 0)
1,528✔
640
                pid = getpid_cached();
1✔
641
        if (pid == 1) /* PID 1 has no parent, shortcut this case */
1,528✔
642
                return -EADDRNOTAVAIL;
643

644
        if (pid == getpid_cached()) {
1,524✔
645
                if (ret)
6✔
646
                        *ret = getppid();
6✔
647
                return 0;
6✔
648
        }
649

650
        p = procfs_file_alloca(pid, "stat");
1,518✔
651
        r = read_one_line_file(p, &line);
1,518✔
652
        if (r == -ENOENT)
1,518✔
653
                return -ESRCH;
654
        if (r < 0)
1,517✔
655
                return r;
656

657
        /* Let's skip the pid and comm fields. The latter is enclosed in () but does not escape any () in its
658
         * value, so let's skip over it manually */
659

660
        p = strrchr(line, ')');
1,517✔
661
        if (!p)
1,517✔
662
                return -EIO;
663
        p++;
1,517✔
664

665
        if (sscanf(p, " "
1,517✔
666
                   "%*c "  /* state */
667
                   "%lu ", /* ppid */
668
                   &ppid) != 1)
669
                return -EIO;
670

671
        /* If ppid is zero the process has no parent. Which might be the case for PID 1 (caught above)
672
         * but also for processes originating in other namespaces that are inserted into a pidns.
673
         * Return a recognizable error in this case. */
674
        if (ppid == 0)
1,517✔
675
                return -EADDRNOTAVAIL;
676

677
        if ((pid_t) ppid < 0 || (unsigned long) (pid_t) ppid != ppid)
1,517✔
678
                return -ERANGE;
679

680
        if (ret)
1,517✔
681
                *ret = (pid_t) ppid;
1,517✔
682

683
        return 0;
684
}
685

686
int pidref_get_ppid(const PidRef *pidref, pid_t *ret) {
2,470✔
687
        int r;
2,470✔
688

689
        if (!pidref_is_set(pidref))
2,470✔
690
                return -ESRCH;
2,470✔
691

692
        if (pidref_is_remote(pidref))
2,470✔
693
                return -EREMOTE;
694

695
        if (pidref->fd >= 0) {
2,470✔
696
                r = pidfd_get_ppid(pidref->fd, ret);
2,470✔
697
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
2,470✔
698
                        return r;
699
        }
700

701
        pid_t ppid;
1,522✔
702
        r = pid_get_ppid(pidref->pid, ret ? &ppid : NULL);
1,522✔
703
        if (r < 0)
1,522✔
704
                return r;
705

706
        r = pidref_verify(pidref);
1,521✔
707
        if (r < 0)
1,521✔
708
                return r;
709

710
        if (ret)
1,521✔
711
                *ret = ppid;
1,521✔
712
        return 0;
713
}
714

715
int pidref_get_ppid_as_pidref(const PidRef *pidref, PidRef *ret) {
11✔
716
        pid_t ppid;
11✔
717
        int r;
11✔
718

719
        assert(ret);
11✔
720

721
        r = pidref_get_ppid(pidref, &ppid);
11✔
722
        if (r < 0)
11✔
723
                return r;
11✔
724

725
        for (unsigned attempt = 0; attempt < 16; attempt++) {
10✔
726
                _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
10✔
727

728
                r = pidref_set_pid(&parent, ppid);
10✔
729
                if (r < 0)
10✔
730
                        return r;
731

732
                /* If we have a pidfd of the original PID, let's verify that the process we acquired really
733
                 * is the parent still */
734
                if (pidref->fd >= 0) {
10✔
735
                        r = pidref_get_ppid(pidref, &ppid);
10✔
736
                        if (r < 0)
10✔
737
                                return r;
738

739
                        /* Did the PPID change since we queried it? if so we might have pinned the wrong
740
                         * process, if its PID got reused by now. Let's try again */
741
                        if (parent.pid != ppid)
10✔
742
                                continue;
×
743
                }
744

745
                *ret = TAKE_PIDREF(parent);
10✔
746
                return 0;
10✔
747
        }
748

749
        /* Give up after 16 tries */
750
        return -ENOTRECOVERABLE;
751
}
752

753
int pid_get_start_time(pid_t pid, usec_t *ret) {
580✔
754
        _cleanup_free_ char *line = NULL;
580✔
755
        const char *p;
580✔
756
        int r;
580✔
757

758
        assert(pid >= 0);
580✔
759

760
        p = procfs_file_alloca(pid, "stat");
580✔
761
        r = read_one_line_file(p, &line);
580✔
762
        if (r == -ENOENT)
580✔
763
                return -ESRCH;
764
        if (r < 0)
580✔
765
                return r;
766

767
        /* Let's skip the pid and comm fields. The latter is enclosed in () but does not escape any () in its
768
         * value, so let's skip over it manually */
769

770
        p = strrchr(line, ')');
580✔
771
        if (!p)
580✔
772
                return -EIO;
773
        p++;
580✔
774

775
        unsigned long llu;
580✔
776

777
        if (sscanf(p, " "
580✔
778
                   "%*c " /* state */
779
                   "%*u " /* ppid */
780
                   "%*u " /* pgrp */
781
                   "%*u " /* session */
782
                   "%*u " /* tty_nr */
783
                   "%*u " /* tpgid */
784
                   "%*u " /* flags */
785
                   "%*u " /* minflt */
786
                   "%*u " /* cminflt */
787
                   "%*u " /* majflt */
788
                   "%*u " /* cmajflt */
789
                   "%*u " /* utime */
790
                   "%*u " /* stime */
791
                   "%*u " /* cutime */
792
                   "%*u " /* cstime */
793
                   "%*i " /* priority */
794
                   "%*i " /* nice */
795
                   "%*u " /* num_threads */
796
                   "%*u " /* itrealvalue */
797
                   "%lu ", /* starttime */
798
                   &llu) != 1)
799
                return -EIO;
800

801
        if (ret)
580✔
802
                *ret = jiffies_to_usec(llu); /* CLOCK_BOOTTIME */
580✔
803

804
        return 0;
805
}
806

807
int pidref_get_start_time(const PidRef *pid, usec_t *ret) {
580✔
808
        usec_t t;
580✔
809
        int r;
580✔
810

811
        if (!pidref_is_set(pid))
580✔
812
                return -ESRCH;
580✔
813

814
        if (pidref_is_remote(pid))
580✔
815
                return -EREMOTE;
816

817
        r = pid_get_start_time(pid->pid, ret ? &t : NULL);
580✔
818
        if (r < 0)
580✔
819
                return r;
820

821
        r = pidref_verify(pid);
580✔
822
        if (r < 0)
580✔
823
                return r;
824

825
        if (ret)
580✔
826
                *ret = t;
580✔
827

828
        return 0;
829
}
830

831
int get_process_umask(pid_t pid, mode_t *ret) {
21,532✔
832
        _cleanup_free_ char *m = NULL;
21,532✔
833
        int r;
21,532✔
834

835
        assert(pid >= 0);
21,532✔
836
        assert(ret);
21,532✔
837

838
        r = procfs_file_get_field(pid, "status", "Umask", &m);
21,532✔
839
        if (r == -ENOENT)
21,532✔
840
                return -ESRCH;
841
        if (r < 0)
21,532✔
842
                return r;
843

844
        return parse_mode(m, ret);
21,532✔
845
}
846

847
int wait_for_terminate(pid_t pid, siginfo_t *ret) {
729✔
848
        return pidref_wait_for_terminate(&PIDREF_MAKE_FROM_PID(pid), ret);
729✔
849
}
850

851
/*
852
 * Return values:
853
 * < 0 : wait_for_terminate() failed to get the state of the
854
 *       process, the process was terminated by a signal, or
855
 *       failed for an unknown reason.
856
 * >=0 : The process terminated normally, and its exit code is
857
 *       returned.
858
 *
859
 * That is, success is indicated by a return value of zero, and an
860
 * error is indicated by a non-zero value.
861
 *
862
 * A warning is emitted if the process terminates abnormally,
863
 * and also if it returns non-zero unless check_exit_code is true.
864
 */
865
int pidref_wait_for_terminate_and_check(const char *name, PidRef *pidref, WaitFlags flags) {
6,909✔
866
        int r;
6,909✔
867

868
        if (!pidref_is_set(pidref))
6,909✔
869
                return -ESRCH;
6,909✔
870
        if (pidref_is_remote(pidref))
13,818✔
871
                return -EREMOTE;
872
        if (pidref->pid == 1 || pidref_is_self(pidref))
6,909✔
873
                return -ECHILD;
×
874

875
        _cleanup_free_ char *buffer = NULL;
6,909✔
876
        if (!name) {
6,909✔
877
                r = pidref_get_comm(pidref, &buffer);
2✔
878
                if (r < 0)
2✔
879
                        log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pidref->pid);
×
880
                else
881
                        name = buffer;
2✔
882
        }
883

884
        int prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG;
6,909✔
885

886
        siginfo_t status;
6,909✔
887
        r = pidref_wait_for_terminate(pidref, &status);
6,909✔
888
        if (r < 0)
6,909✔
889
                return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name));
×
890

891
        if (status.si_code == CLD_EXITED) {
6,909✔
892
                if (status.si_status != EXIT_SUCCESS)
6,909✔
893
                        log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG,
63✔
894
                                 "%s failed with exit status %i.", strna(name), status.si_status);
895
                else
896
                        log_debug("%s succeeded.", name);
6,846✔
897

898
                return status.si_status;
6,909✔
899

UNCOV
900
        } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
×
901

UNCOV
902
                log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status));
×
UNCOV
903
                return -EPROTO;
×
904
        }
905

906
        log_full(prio, "%s failed due to unknown reason.", strna(name));
×
907
        return -EPROTO;
908
}
909

910
int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) {
5,603✔
911
        return pidref_wait_for_terminate_and_check(name, &PIDREF_MAKE_FROM_PID(pid), flags);
5,603✔
912
}
913

914
/*
915
 * Return values:
916
 *
917
 * < 0 : wait_for_terminate_with_timeout() failed to get the state of the process, the process timed out, the process
918
 *       was terminated by a signal, or failed for an unknown reason.
919
 *
920
 * >=0 : The process terminated normally with no failures.
921
 *
922
 * Success is indicated by a return value of zero, a timeout is indicated by ETIMEDOUT, and all other child failure
923
 * states are indicated by error is indicated by a non-zero value.
924
 *
925
 * This call assumes SIGCHLD has been blocked already, in particular before the child to wait for has been forked off
926
 * to remain entirely race-free.
927
 */
928
int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
×
929
        sigset_t mask;
×
930
        int r;
×
931
        usec_t until;
×
932

933
        assert_se(sigemptyset(&mask) == 0);
×
934
        assert_se(sigaddset(&mask, SIGCHLD) == 0);
×
935

936
        /* Drop into a sigtimewait-based timeout. Waiting for the
937
         * pid to exit. */
938
        until = usec_add(now(CLOCK_MONOTONIC), timeout);
×
939
        for (;;) {
×
940
                usec_t n;
×
941
                siginfo_t status = {};
×
942

943
                n = now(CLOCK_MONOTONIC);
×
944
                if (n >= until)
×
945
                        break;
946

947
                r = RET_NERRNO(sigtimedwait(&mask, NULL, TIMESPEC_STORE(until - n)));
×
948
                /* Assuming we woke due to the child exiting. */
949
                if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
×
950
                        if (status.si_pid == pid) {
×
951
                                /* This is the correct child. */
952
                                if (status.si_code == CLD_EXITED)
×
953
                                        return status.si_status == 0 ? 0 : -EPROTO;
×
954
                                else
955
                                        return -EPROTO;
956
                        }
957
                }
958
                /* Not the child, check for errors and proceed appropriately */
959
                if (r < 0) {
×
960
                        switch (r) {
×
961
                        case -EAGAIN:
962
                                /* Timed out, child is likely hung. */
963
                                return -ETIMEDOUT;
964
                        case -EINTR:
×
965
                                /* Received a different signal and should retry */
966
                                continue;
×
967
                        default:
×
968
                                /* Return any unexpected errors */
969
                                return r;
×
970
                        }
971
                }
972
        }
973

974
        return -EPROTO;
×
975
}
976

977
void sigkill_wait(pid_t pid) {
136✔
978
        assert(pid > 1);
136✔
979

980
        (void) kill(pid, SIGKILL);
136✔
981
        (void) wait_for_terminate(pid, NULL);
136✔
982
}
136✔
983

984
void sigkill_waitp(pid_t *pid) {
12,276✔
985
        PROTECT_ERRNO;
12,276✔
986

987
        if (!pid)
12,276✔
988
                return;
989
        if (*pid <= 1)
12,276✔
990
                return;
991

992
        sigkill_wait(*pid);
136✔
993
}
994

995
void sigterm_wait(pid_t pid) {
96✔
996
        assert(pid > 1);
96✔
997

998
        (void) kill_and_sigcont(pid, SIGTERM);
96✔
999
        (void) wait_for_terminate(pid, NULL);
96✔
1000
}
96✔
1001

1002
void sigkill_nowait(pid_t pid) {
×
1003
        assert(pid > 1);
×
1004

1005
        (void) kill(pid, SIGKILL);
×
1006
}
×
1007

1008
void sigkill_nowaitp(pid_t *pid) {
×
1009
        PROTECT_ERRNO;
×
1010

1011
        if (!pid)
×
1012
                return;
1013
        if (*pid <= 1)
×
1014
                return;
1015

1016
        sigkill_nowait(*pid);
×
1017
}
1018

1019
int kill_and_sigcont(pid_t pid, int sig) {
96✔
1020
        int r;
96✔
1021

1022
        r = RET_NERRNO(kill(pid, sig));
96✔
1023

1024
        /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
1025
         * affected by a process being suspended anyway. */
1026
        if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
96✔
1027
                (void) kill(pid, SIGCONT);
96✔
1028

1029
        return r;
96✔
1030
}
1031

1032
int getenv_for_pid(pid_t pid, const char *field, char **ret) {
4,778✔
1033
        _cleanup_fclose_ FILE *f = NULL;
4,778✔
1034
        const char *path;
4,778✔
1035
        size_t sum = 0;
4,778✔
1036
        int r;
4,778✔
1037

1038
        assert(pid >= 0);
4,778✔
1039
        assert(field);
4,778✔
1040
        assert(ret);
4,778✔
1041

1042
        if (pid == 0 || pid == getpid_cached())
4,778✔
1043
                return strdup_to_full(ret, getenv(field));
14✔
1044

1045
        if (!pid_is_valid(pid))
4,764✔
1046
                return -EINVAL;
1047

1048
        path = procfs_file_alloca(pid, "environ");
4,764✔
1049

1050
        r = fopen_unlocked(path, "re", &f);
4,764✔
1051
        if (r == -ENOENT)
4,764✔
1052
                return -ESRCH;
1053
        if (r < 0)
4,361✔
1054
                return r;
1055

1056
        for (;;) {
51,355✔
1057
                _cleanup_free_ char *line = NULL;
24,167✔
1058
                const char *match;
27,205✔
1059

1060
                if (sum > ENVIRONMENT_BLOCK_MAX) /* Give up searching eventually */
27,205✔
1061
                        return -ENOBUFS;
1062

1063
                r = read_nul_string(f, LONG_LINE_MAX, &line);
27,205✔
1064
                if (r < 0)
27,205✔
1065
                        return r;
1066
                if (r == 0)  /* EOF */
27,205✔
1067
                        break;
1068

1069
                sum += r;
24,167✔
1070

1071
                match = startswith(line, field);
24,167✔
1072
                if (match && *match == '=')
24,167✔
1073
                        return strdup_to_full(ret, match + 1);
17✔
1074
        }
1075

1076
        *ret = NULL;
3,038✔
1077
        return 0;
3,038✔
1078
}
1079

1080
int pidref_is_my_child(PidRef *pid) {
2,448✔
1081
        int r;
2,448✔
1082

1083
        if (!pidref_is_set(pid))
2,448✔
1084
                return -ESRCH;
2,448✔
1085

1086
        if (pidref_is_remote(pid))
2,448✔
1087
                return -EREMOTE;
1088

1089
        if (pid->pid == 1 || pidref_is_self(pid))
2,448✔
1090
                return false;
×
1091

1092
        pid_t ppid;
2,448✔
1093
        r = pidref_get_ppid(pid, &ppid);
2,448✔
1094
        if (r == -EADDRNOTAVAIL) /* if this process is outside of our pidns, it is definitely not our child */
2,448✔
1095
                return false;
1096
        if (r < 0)
2,448✔
1097
                return r;
1098

1099
        return ppid == getpid_cached();
2,448✔
1100
}
1101

1102
int pid_is_my_child(pid_t pid) {
×
1103

1104
        if (pid == 0)
×
1105
                return false;
×
1106

1107
        return pidref_is_my_child(&PIDREF_MAKE_FROM_PID(pid));
×
1108
}
1109

1110
int pidref_is_unwaited(PidRef *pid) {
8,384✔
1111
        int r;
8,384✔
1112

1113
        /* Checks whether a PID is still valid at all, including a zombie */
1114

1115
        if (!pidref_is_set(pid))
8,384✔
1116
                return -ESRCH;
1117

1118
        if (pidref_is_remote(pid))
8,383✔
1119
                return -EREMOTE;
1120

1121
        if (pid->pid == 1 || pidref_is_self(pid))
8,383✔
1122
                return true;
1✔
1123

1124
        r = pidref_kill(pid, 0);
8,382✔
1125
        if (r == -ESRCH)
8,382✔
1126
                return false;
1127
        if (r < 0)
1,955✔
1128
                return r;
129✔
1129

1130
        return true;
1131
}
1132

1133
int pid_is_unwaited(pid_t pid) {
7,807✔
1134

1135
        if (pid == 0)
7,807✔
1136
                return true;
7,807✔
1137

1138
        return pidref_is_unwaited(&PIDREF_MAKE_FROM_PID(pid));
7,807✔
1139
}
1140

1141
int pid_is_alive(pid_t pid) {
12,823✔
1142
        int r;
12,823✔
1143

1144
        /* Checks whether a PID is still valid and not a zombie */
1145

1146
        if (pid < 0)
12,823✔
1147
                return -ESRCH;
1148

1149
        if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
12,822✔
1150
                return true;
1151

1152
        if (pid == getpid_cached())
12,822✔
1153
                return true;
1154

1155
        r = get_process_state(pid);
12,821✔
1156
        if (r == -ESRCH)
12,821✔
1157
                return false;
1158
        if (r < 0)
10,061✔
1159
                return r;
1160

1161
        return r != 'Z';
10,061✔
1162
}
1163

1164
int pidref_is_alive(const PidRef *pidref) {
12,818✔
1165
        int r, result;
12,818✔
1166

1167
        if (!pidref_is_set(pidref))
12,818✔
1168
                return -ESRCH;
1169

1170
        if (pidref_is_remote(pidref))
12,818✔
1171
                return -EREMOTE;
1172

1173
        result = pid_is_alive(pidref->pid);
12,818✔
1174
        if (result < 0) {
12,818✔
1175
                assert(result != -ESRCH);
×
1176
                return result;
1177
        }
1178

1179
        r = pidref_verify(pidref);
12,818✔
1180
        if (r == -ESRCH)
12,818✔
1181
                return false;
1182
        if (r < 0)
10,055✔
1183
                return r;
×
1184

1185
        return result;
1186
}
1187

1188
int pidref_from_same_root_fs(PidRef *a, PidRef *b) {
20✔
1189
        _cleanup_(pidref_done) PidRef self = PIDREF_NULL;
×
1190
        int r;
20✔
1191

1192
        /* Checks if the two specified processes have the same root fs. Either can be specified as NULL in
1193
         * which case we'll check against ourselves. */
1194

1195
        if (!a || !b) {
20✔
1196
                r = pidref_set_self(&self);
×
1197
                if (r < 0)
×
1198
                        return r;
1199
                if (!a)
×
1200
                        a = &self;
×
1201
                if (!b)
×
1202
                        b = &self;
×
1203
        }
1204

1205
        if (!pidref_is_set(a) || !pidref_is_set(b))
20✔
1206
                return -ESRCH;
×
1207

1208
        /* If one of the two processes have the same root they cannot have the same root fs, but if both of
1209
         * them do we don't know */
1210
        if (pidref_is_remote(a) && pidref_is_remote(b))
20✔
1211
                return -EREMOTE;
1212
        if (pidref_is_remote(a) || pidref_is_remote(b))
60✔
1213
                return false;
1214

1215
        if (pidref_equal(a, b))
20✔
1216
                return true;
1217

1218
        const char *roota = procfs_file_alloca(a->pid, "root");
18✔
1219
        const char *rootb = procfs_file_alloca(b->pid, "root");
18✔
1220

1221
        int result = inode_same(roota, rootb, 0);
18✔
1222
        if (result == -ENOENT)
18✔
1223
                return proc_mounted() == 0 ? -ENOSYS : -ESRCH;
×
1224
        if (result < 0)
18✔
1225
                return result;
1226

1227
        r = pidref_verify(a);
18✔
1228
        if (r < 0)
18✔
1229
                return r;
1230
        r = pidref_verify(b);
18✔
1231
        if (r < 0)
18✔
1232
                return r;
×
1233

1234
        return result;
1235
}
1236

1237
bool is_main_thread(void) {
7,104,951✔
1238
        static thread_local int cached = -1;
7,104,951✔
1239

1240
        if (cached < 0)
7,104,951✔
1241
                cached = getpid_cached() == gettid();
55,687✔
1242

1243
        return cached;
7,104,951✔
1244
}
1245

1246
bool oom_score_adjust_is_valid(int oa) {
6,683✔
1247
        return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
6,683✔
1248
}
1249

1250
unsigned long personality_from_string(const char *p) {
9✔
1251
        Architecture architecture;
9✔
1252

1253
        if (!p)
9✔
1254
                return PERSONALITY_INVALID;
1255

1256
        /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
1257
         * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
1258
         * the same register size. */
1259

1260
        architecture = architecture_from_string(p);
8✔
1261
        if (architecture < 0)
8✔
1262
                return PERSONALITY_INVALID;
1263

1264
        if (architecture == native_architecture())
6✔
1265
                return PER_LINUX;
1266
#ifdef ARCHITECTURE_SECONDARY
1267
        if (architecture == ARCHITECTURE_SECONDARY)
3✔
1268
                return PER_LINUX32;
2✔
1269
#endif
1270

1271
        return PERSONALITY_INVALID;
1272
}
1273

1274
const char* personality_to_string(unsigned long p) {
2,760✔
1275
        Architecture architecture = _ARCHITECTURE_INVALID;
2,760✔
1276

1277
        if (p == PER_LINUX)
2,760✔
1278
                architecture = native_architecture();
1279
#ifdef ARCHITECTURE_SECONDARY
1280
        else if (p == PER_LINUX32)
2,755✔
1281
                architecture = ARCHITECTURE_SECONDARY;
1282
#endif
1283

1284
        if (architecture < 0)
1285
                return NULL;
1286

1287
        return architecture_to_string(architecture);
7✔
1288
}
1289

1290
int safe_personality(unsigned long p) {
1,465✔
1291
        int ret;
1,465✔
1292

1293
        /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
1294
         * and in others as negative return value containing an errno-like value. Let's work around this: this is a
1295
         * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
1296
         * the return value indicating the same issue, so that we are definitely on the safe side.
1297
         *
1298
         * See https://github.com/systemd/systemd/issues/6737 */
1299

1300
        errno = 0;
1,465✔
1301
        ret = personality(p);
1,465✔
1302
        if (ret < 0) {
1,465✔
1303
                if (errno != 0)
12✔
1304
                        return -errno;
12✔
1305

1306
                errno = -ret;
×
1307
        }
1308

1309
        return ret;
1310
}
1311

1312
int opinionated_personality(unsigned long *ret) {
1,450✔
1313
        int current;
1,450✔
1314

1315
        /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1316
         * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1317
         * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1318

1319
        current = safe_personality(PERSONALITY_INVALID);
1,450✔
1320
        if (current < 0)
1,450✔
1321
                return current;
1322

1323
        if (((unsigned long) current & OPINIONATED_PERSONALITY_MASK) == PER_LINUX32)
1,450✔
1324
                *ret = PER_LINUX32;
×
1325
        else
1326
                *ret = PER_LINUX;
1,450✔
1327

1328
        return 0;
1329
}
1330

1331
void valgrind_summary_hack(void) {
39✔
1332
#if HAVE_VALGRIND_VALGRIND_H
1333
        if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
1334
                pid_t pid;
1335
                pid = raw_clone(SIGCHLD);
1336
                if (pid < 0)
1337
                        log_struct_errno(
1338
                                LOG_EMERG, errno,
1339
                                LOG_MESSAGE_ID(SD_MESSAGE_VALGRIND_HELPER_FORK_STR),
1340
                                LOG_MESSAGE("Failed to fork off valgrind helper: %m"));
1341
                else if (pid == 0)
1342
                        exit(EXIT_SUCCESS);
1343
                else {
1344
                        log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
1345
                        (void) wait_for_terminate(pid, NULL);
1346
                }
1347
        }
1348
#endif
1349
}
39✔
1350

1351
int pid_compare_func(const pid_t *a, const pid_t *b) {
1,469✔
1352
        /* Suitable for usage in qsort() */
1353
        return CMP(*a, *b);
1,469✔
1354
}
1355

1356
bool nice_is_valid(int n) {
875✔
1357
        return n >= PRIO_MIN && n < PRIO_MAX;
875✔
1358
}
1359

1360
bool sched_policy_is_valid(int i) {
×
1361
        return IN_SET(i, SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, SCHED_FIFO, SCHED_RR);
×
1362
}
1363

1364
bool sched_priority_is_valid(int i) {
×
1365
        return i >= 0 && i <= sched_get_priority_max(SCHED_RR);
×
1366
}
1367

1368
/* The cached PID, possible values:
1369
 *
1370
 *     == UNSET [0]  → cache not initialized yet
1371
 *     == BUSY [-1]  → some thread is initializing it at the moment
1372
 *     any other     → the cached PID
1373
 */
1374

1375
#define CACHED_PID_UNSET ((pid_t) 0)
1376
#define CACHED_PID_BUSY ((pid_t) -1)
1377

1378
static pid_t cached_pid = CACHED_PID_UNSET;
1379

1380
void reset_cached_pid(void) {
1,560✔
1381
        /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1382
        cached_pid = CACHED_PID_UNSET;
1,560✔
1383
}
1,560✔
1384

1385
pid_t getpid_cached(void) {
148,864,186✔
1386
        static bool installed = false;
148,864,186✔
1387
        pid_t current_value = CACHED_PID_UNSET;
148,864,186✔
1388

1389
        /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1390
         * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1391
         * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1392
         * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1393
         *
1394
         * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1395
         * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1396
         */
1397

1398
        (void) __atomic_compare_exchange_n(
148,864,186✔
1399
                        &cached_pid,
1400
                        &current_value,
1401
                        CACHED_PID_BUSY,
1402
                        false,
1403
                        __ATOMIC_SEQ_CST,
1404
                        __ATOMIC_SEQ_CST);
1405

1406
        switch (current_value) {
148,864,186✔
1407

1408
        case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
102,326✔
1409
                pid_t new_pid;
102,326✔
1410

1411
                new_pid = getpid();
102,326✔
1412

1413
                if (!installed) {
102,326✔
1414
                        /* __register_atfork() either returns 0 or -ENOMEM, in its glibc implementation. Since it's
1415
                         * only half-documented (glibc doesn't document it but LSB does — though only superficially)
1416
                         * we'll check for errors only in the most generic fashion possible. */
1417

1418
                        if (pthread_atfork(NULL, NULL, reset_cached_pid) != 0) {
71,689✔
1419
                                /* OOM? Let's try again later */
1420
                                cached_pid = CACHED_PID_UNSET;
×
1421
                                return new_pid;
×
1422
                        }
1423

1424
                        installed = true;
71,689✔
1425
                }
1426

1427
                cached_pid = new_pid;
102,326✔
1428
                return new_pid;
102,326✔
1429
        }
1430

1431
        case CACHED_PID_BUSY: /* Somebody else is currently initializing */
×
1432
                return getpid();
×
1433

1434
        default: /* Properly initialized */
1435
                return current_value;
1436
        }
1437
}
1438

1439
int must_be_root(void) {
54✔
1440

1441
        if (geteuid() == 0)
54✔
1442
                return 0;
1443

1444
        return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be root.");
×
1445
}
1446

1447
pid_t clone_with_nested_stack(int (*fn)(void *), int flags, void *userdata) {
3,518✔
1448
        size_t ps;
3,518✔
1449
        pid_t pid;
3,518✔
1450
        void *mystack;
3,518✔
1451

1452
        /* A wrapper around glibc's clone() call that automatically sets up a "nested" stack. Only supports
1453
         * invocations without CLONE_VM, so that we can continue to use the parent's stack mapping.
1454
         *
1455
         * Note: glibc's clone() wrapper does not synchronize malloc() locks. This means that if the parent
1456
         * is threaded these locks will be in an undefined state in the child, and hence memory allocations
1457
         * are likely going to run into deadlocks. Hence: if you use this function make sure your parent is
1458
         * strictly single-threaded or your child never calls malloc(). */
1459

1460
        assert((flags & (CLONE_VM|CLONE_PARENT_SETTID|CLONE_CHILD_SETTID|
3,518✔
1461
                         CLONE_CHILD_CLEARTID|CLONE_SETTLS)) == 0);
1462

1463
        /* We allocate some space on the stack to use as the stack for the child (hence "nested"). Note that
1464
         * the net effect is that the child will have the start of its stack inside the stack of the parent,
1465
         * but since they are a CoW copy of each other that's fine. We allocate one page-aligned page. But
1466
         * since we don't want to deal with differences between systems where the stack grows backwards or
1467
         * forwards we'll allocate one more and place the stack address in the middle. Except that we also
1468
         * want it page aligned, hence we'll allocate one page more. Makes 3. */
1469

1470
        ps = page_size();
3,518✔
1471
        mystack = alloca(ps*3);
3,518✔
1472
        mystack = (uint8_t*) mystack + ps; /* move pointer one page ahead since stacks usually grow backwards */
3,518✔
1473
        mystack = (void*) ALIGN_TO((uintptr_t) mystack, ps); /* align to page size (moving things further ahead) */
3,518✔
1474

1475
#if HAVE_CLONE
1476
        pid = clone(fn, mystack, flags, userdata);
3,518✔
1477
#else
1478
        pid = __clone2(fn, mystack, ps, flags, userdata);
1479
#endif
1480
        if (pid < 0)
3,518✔
1481
                return -errno;
×
1482

1483
        return pid;
1484
}
1485

1486
static void restore_sigsetp(sigset_t **ssp) {
58,259✔
1487
        if (*ssp)
58,259✔
1488
                (void) sigprocmask(SIG_SETMASK, *ssp, NULL);
24,055✔
1489
}
58,259✔
1490

1491
static int fork_flags_to_signal(ForkFlags flags) {
28,824✔
1492
        return (flags & FORK_DEATHSIG_SIGTERM) ? SIGTERM :
28,824✔
1493
                (flags & FORK_DEATHSIG_SIGINT) ? SIGINT :
712✔
1494
                                                 SIGKILL;
1495
}
1496

1497
int pidref_safe_fork_full(
28,325✔
1498
                const char *name,
1499
                const int stdio_fds[3],
1500
                int except_fds[],
1501
                size_t n_except_fds,
1502
                ForkFlags flags,
1503
                PidRef *ret_pid) {
1504

1505
        pid_t original_pid, pid;
28,325✔
1506
        sigset_t saved_ss, ss;
28,325✔
1507
        _unused_ _cleanup_(restore_sigsetp) sigset_t *saved_ssp = NULL;
×
1508
        bool block_signals = false, block_all = false, intermediary = false;
28,325✔
1509
        _cleanup_close_pair_ int pidref_transport_fds[2] = EBADF_PAIR;
58,259✔
1510
        int prio, r;
28,325✔
1511

1512
        assert(!FLAGS_SET(flags, FORK_WAIT|FORK_FREEZE));
28,325✔
1513
        assert(!FLAGS_SET(flags, FORK_DETACH) ||
28,325✔
1514
               (flags & (FORK_WAIT|FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGINT|FORK_DEATHSIG_SIGKILL)) == 0);
1515

1516
        /* A wrapper around fork(), that does a couple of important initializations in addition to mere
1517
         * forking. If provided, ret_pid is initialized in both the parent and the child process, both times
1518
         * referencing the child process. Returns == 0 in the child and > 0 in the parent. */
1519

1520
        prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG;
28,325✔
1521

1522
        original_pid = getpid_cached();
28,325✔
1523

1524
        if (flags & FORK_FLUSH_STDIO) {
28,325✔
1525
                fflush(stdout);
5✔
1526
                fflush(stderr); /* This one shouldn't be necessary, stderr should be unbuffered anyway, but let's better be safe than sorry */
5✔
1527
        }
1528

1529
        if (flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGINT)) {
28,325✔
1530
                /* We temporarily block all signals, so that the new child has them blocked initially. This
1531
                 * way, we can be sure that SIGTERMs are not lost we might send to the child. (Note that for
1532
                 * FORK_DEATHSIG_SIGKILL we don't bother, since it cannot be blocked anyway.) */
1533

1534
                assert_se(sigfillset(&ss) >= 0);
24,217✔
1535
                block_signals = block_all = true;
1536

1537
        } else if (flags & FORK_WAIT) {
4,108✔
1538
                /* Let's block SIGCHLD at least, so that we can safely watch for the child process */
1539

1540
                assert_se(sigemptyset(&ss) >= 0);
148✔
1541
                assert_se(sigaddset(&ss, SIGCHLD) >= 0);
148✔
1542
                block_signals = true;
1543
        }
1544

1545
        if (block_signals) {
1546
                if (sigprocmask(SIG_BLOCK, &ss, &saved_ss) < 0)
24,365✔
1547
                        return log_full_errno(prio, errno, "Failed to block signal mask: %m");
×
1548
                saved_ssp = &saved_ss;
24,365✔
1549
        }
1550

1551
        if (FLAGS_SET(flags, FORK_DETACH)) {
28,325✔
1552
                /* Fork off intermediary child if needed */
1553

1554
                r = is_reaper_process();
105✔
1555
                if (r < 0)
105✔
1556
                        return log_full_errno(prio, r, "Failed to determine if we are a reaper process: %m");
×
1557

1558
                if (!r) {
105✔
1559
                        /* Not a reaper process, hence do a double fork() so we are reparented to one */
1560

1561
                        if (ret_pid && socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pidref_transport_fds) < 0)
11✔
1562
                                return log_full_errno(prio, errno, "Failed to allocate pidref socket: %m");
×
1563

1564
                        pid = fork();
11✔
1565
                        if (pid < 0)
28✔
1566
                                return log_full_errno(prio, errno, "Failed to fork off '%s': %m", strna(name));
×
1567
                        if (pid > 0) {
28✔
1568
                                log_debug("Successfully forked off intermediary '%s' as PID " PID_FMT ".", strna(name), pid);
11✔
1569

1570
                                pidref_transport_fds[1] = safe_close(pidref_transport_fds[1]);
11✔
1571

1572
                                if (pidref_transport_fds[0] >= 0) {
11✔
1573
                                        /* Wait for the intermediary child to exit so the caller can be certain the actual child
1574
                                         * process has been reparented by the time this function returns. */
1575
                                        r = wait_for_terminate_and_check(name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0);
20✔
1576
                                        if (r < 0)
10✔
1577
                                                return log_full_errno(prio, r, "Failed to wait for intermediary process: %m");
×
1578
                                        if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */
10✔
1579
                                                return -EPROTO;
1580

1581
                                        int pidfd;
10✔
1582
                                        ssize_t n = receive_one_fd_iov(
20✔
1583
                                                        pidref_transport_fds[0],
1584
                                                        &IOVEC_MAKE(&pid, sizeof(pid)),
10✔
1585
                                                        /* iovlen= */ 1,
1586
                                                        /* flags= */ 0,
1587
                                                        &pidfd);
1588
                                        if (n < 0)
10✔
1589
                                                return log_full_errno(prio, n, "Failed to receive child pidref: %m");
×
1590

1591
                                        *ret_pid = (PidRef) { .pid = pid, .fd = pidfd };
10✔
1592
                                }
1593

1594
                                return 1; /* return in the parent */
11✔
1595
                        }
1596

1597
                        pidref_transport_fds[0] = safe_close(pidref_transport_fds[0]);
17✔
1598
                        intermediary = true;
17✔
1599
                }
1600
        }
1601

1602
        if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS|FORK_NEW_NETNS|FORK_NEW_PIDNS)) != 0)
28,331✔
1603
                pid = raw_clone(SIGCHLD|
5,462✔
1604
                                (FLAGS_SET(flags, FORK_NEW_MOUNTNS) ? CLONE_NEWNS : 0) |
5,462✔
1605
                                (FLAGS_SET(flags, FORK_NEW_USERNS) ? CLONE_NEWUSER : 0) |
5,462✔
1606
                                (FLAGS_SET(flags, FORK_NEW_NETNS) ? CLONE_NEWNET : 0) |
5,462✔
1607
                                (FLAGS_SET(flags, FORK_NEW_PIDNS) ? CLONE_NEWPID : 0));
5,462✔
1608
        else
1609
                pid = fork();
22,869✔
1610
        if (pid < 0)
58,259✔
1611
                return log_full_errno(prio, errno, "Failed to fork off '%s': %m", strna(name));
×
1612
        if (pid > 0) {
58,259✔
1613

1614
                /* If we are in the intermediary process, exit now */
1615
                if (intermediary) {
28,015✔
1616
                        if (pidref_transport_fds[1] >= 0) {
11✔
1617
                                _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
10✔
1618

1619
                                r = pidref_set_pid(&pidref, pid);
10✔
1620
                                if (r < 0) {
10✔
1621
                                        log_full_errno(prio, r, "Failed to open reference to PID "PID_FMT": %m", pid);
×
1622
                                        _exit(EXIT_FAILURE);
×
1623
                                }
1624

1625
                                r = send_one_fd_iov(
10✔
1626
                                                pidref_transport_fds[1],
1627
                                                pidref.fd,
1628
                                                &IOVEC_MAKE(&pidref.pid, sizeof(pidref.pid)),
1629
                                                /* iovlen= */ 1,
1630
                                                /* flags= */ 0);
1631
                                if (r < 0) {
10✔
1632
                                        log_full_errno(prio, r, "Failed to send child pidref: %m");
×
1633
                                        _exit(EXIT_FAILURE);
×
1634
                                }
1635
                        }
1636

1637
                        _exit(EXIT_SUCCESS);
11✔
1638
                }
1639

1640
                /* We are in the parent process */
1641
                log_debug("Successfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
28,004✔
1642

1643
                if (flags & FORK_WAIT) {
28,004✔
1644
                        if (block_all) {
737✔
1645
                                /* undo everything except SIGCHLD */
1646
                                ss = saved_ss;
589✔
1647
                                assert_se(sigaddset(&ss, SIGCHLD) >= 0);
589✔
1648
                                (void) sigprocmask(SIG_SETMASK, &ss, NULL);
589✔
1649
                        }
1650

1651
                        r = wait_for_terminate_and_check(name, pid, (flags & FORK_LOG ? WAIT_LOG : 0));
865✔
1652
                        if (r < 0)
737✔
1653
                                return r;
1654
                        if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */
737✔
1655
                                return -EPROTO;
1656

1657
                        /* If we are in the parent and successfully waited, then the process doesn't exist anymore. */
1658
                        if (ret_pid)
737✔
1659
                                *ret_pid = PIDREF_NULL;
14✔
1660

1661
                        return 1;
737✔
1662
                }
1663

1664
                if (ret_pid) {
27,267✔
1665
                        if (FLAGS_SET(flags, _FORK_PID_ONLY))
26,672✔
1666
                                *ret_pid = PIDREF_MAKE_FROM_PID(pid);
7,546✔
1667
                        else {
1668
                                r = pidref_set_pid(ret_pid, pid);
19,126✔
1669
                                if (r < 0) /* Let's not fail for this, no matter what, the process exists after all, and that's key */
19,126✔
1670
                                        *ret_pid = PIDREF_MAKE_FROM_PID(pid);
×
1671
                        }
1672
                }
1673

1674
                return 1;
27,267✔
1675
        }
1676

1677
        /* We are in the child process */
1678

1679
        pidref_transport_fds[1] = safe_close(pidref_transport_fds[1]);
30,244✔
1680

1681
        /* Restore signal mask manually */
1682
        saved_ssp = NULL;
30,244✔
1683

1684
        if (flags & FORK_REOPEN_LOG) {
30,244✔
1685
                /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
1686
                log_close();
6,716✔
1687
                log_set_open_when_needed(true);
6,716✔
1688
                log_settle_target();
6,716✔
1689
        }
1690

1691
        if (name) {
30,244✔
1692
                r = rename_process(name);
30,244✔
1693
                if (r < 0)
30,244✔
1694
                        log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG,
×
1695
                                       r, "Failed to rename process, ignoring: %m");
1696
        }
1697

1698
        if (flags & (FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGINT|FORK_DEATHSIG_SIGKILL))
30,244✔
1699
                if (prctl(PR_SET_PDEATHSIG, fork_flags_to_signal(flags)) < 0) {
28,824✔
1700
                        log_full_errno(prio, errno, "Failed to set death signal: %m");
×
1701
                        _exit(EXIT_FAILURE);
×
1702
                }
1703

1704
        if (flags & FORK_RESET_SIGNALS) {
30,244✔
1705
                r = reset_all_signal_handlers();
25,142✔
1706
                if (r < 0) {
25,142✔
1707
                        log_full_errno(prio, r, "Failed to reset signal handlers: %m");
×
1708
                        _exit(EXIT_FAILURE);
×
1709
                }
1710

1711
                /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
1712
                r = reset_signal_mask();
25,142✔
1713
                if (r < 0) {
25,142✔
1714
                        log_full_errno(prio, r, "Failed to reset signal mask: %m");
×
1715
                        _exit(EXIT_FAILURE);
×
1716
                }
1717
        } else if (block_signals) { /* undo what we did above */
5,102✔
1718
                if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
4,581✔
1719
                        log_full_errno(prio, errno, "Failed to restore signal mask: %m");
×
1720
                        _exit(EXIT_FAILURE);
×
1721
                }
1722
        }
1723

1724
        if (flags & (FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGKILL|FORK_DEATHSIG_SIGINT)) {
30,244✔
1725
                pid_t ppid;
28,824✔
1726
                /* Let's see if the parent PID is still the one we started from? If not, then the parent
1727
                 * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
1728

1729
                ppid = getppid();
28,824✔
1730
                if (ppid == 0)
28,824✔
1731
                        /* Parent is in a different PID namespace. */;
1732
                else if (ppid != original_pid) {
28,786✔
1733
                        int sig = fork_flags_to_signal(flags);
×
1734
                        log_debug("Parent died early, raising %s.", signal_to_string(sig));
×
1735
                        (void) raise(sig);
×
1736
                        _exit(EXIT_FAILURE);
×
1737
                }
1738
        }
1739

1740
        if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) {
30,244✔
1741
                /* Optionally, make sure we never propagate mounts to the host. */
1742
                if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
140✔
1743
                        log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m");
×
1744
                        _exit(EXIT_FAILURE);
×
1745
                }
1746
        }
1747

1748
        if (FLAGS_SET(flags, FORK_PRIVATE_TMP)) {
30,244✔
1749
                assert(FLAGS_SET(flags, FORK_NEW_MOUNTNS));
×
1750

1751
                /* Optionally, overmount new tmpfs instance on /tmp/. */
1752
                r = mount_nofollow("tmpfs", "/tmp", "tmpfs",
×
1753
                                   MS_NOSUID|MS_NODEV,
1754
                                   "mode=01777" TMPFS_LIMITS_RUN);
1755
                if (r < 0) {
×
1756
                        log_full_errno(prio, r, "Failed to overmount /tmp/: %m");
×
1757
                        _exit(EXIT_FAILURE);
×
1758
                }
1759
        }
1760

1761
        if (flags & FORK_REARRANGE_STDIO) {
30,244✔
1762
                if (stdio_fds) {
15,896✔
1763
                        r = rearrange_stdio(stdio_fds[0], stdio_fds[1], stdio_fds[2]);
15,880✔
1764
                        if (r < 0) {
15,880✔
1765
                                log_full_errno(prio, r, "Failed to rearrange stdio fds: %m");
×
1766
                                _exit(EXIT_FAILURE);
×
1767
                        }
1768

1769
                        /* Turn off O_NONBLOCK on the fdio fds, in case it was left on */
1770
                        stdio_disable_nonblock();
15,880✔
1771
                } else {
1772
                        r = make_null_stdio();
16✔
1773
                        if (r < 0) {
16✔
1774
                                log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m");
×
1775
                                _exit(EXIT_FAILURE);
×
1776
                        }
1777
                }
1778
        } else if (flags & FORK_STDOUT_TO_STDERR) {
14,348✔
1779
                if (dup2(STDERR_FILENO, STDOUT_FILENO) < 0) {
2✔
1780
                        log_full_errno(prio, errno, "Failed to connect stdout to stderr: %m");
×
1781
                        _exit(EXIT_FAILURE);
×
1782
                }
1783
        }
1784

1785
        if (flags & FORK_CLOSE_ALL_FDS) {
30,244✔
1786
                /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
1787
                log_close();
24,179✔
1788

1789
                r = close_all_fds(except_fds, n_except_fds);
24,179✔
1790
                if (r < 0) {
24,179✔
1791
                        log_full_errno(prio, r, "Failed to close all file descriptors: %m");
×
1792
                        _exit(EXIT_FAILURE);
×
1793
                }
1794
        }
1795

1796
        if (flags & FORK_PACK_FDS) {
30,244✔
1797
                /* FORK_CLOSE_ALL_FDS ensures that except_fds are the only FDs >= 3 that are
1798
                 * open, this is including the log. This is required by pack_fds, which will
1799
                 * get stuck in an infinite loop of any FDs other than except_fds are open. */
1800
                assert(FLAGS_SET(flags, FORK_CLOSE_ALL_FDS));
88✔
1801

1802
                r = pack_fds(except_fds, n_except_fds);
88✔
1803
                if (r < 0) {
88✔
1804
                        log_full_errno(prio, r, "Failed to pack file descriptors: %m");
×
1805
                        _exit(EXIT_FAILURE);
×
1806
                }
1807
        }
1808

1809
        if (flags & FORK_CLOEXEC_OFF) {
30,244✔
1810
                r = fd_cloexec_many(except_fds, n_except_fds, false);
103✔
1811
                if (r < 0) {
103✔
1812
                        log_full_errno(prio, r, "Failed to turn off O_CLOEXEC on file descriptors: %m");
×
1813
                        _exit(EXIT_FAILURE);
×
1814
                }
1815
        }
1816

1817
        /* When we were asked to reopen the logs, do so again now */
1818
        if (flags & FORK_REOPEN_LOG) {
30,244✔
1819
                log_open();
6,716✔
1820
                log_set_open_when_needed(false);
6,716✔
1821
        }
1822

1823
        if (flags & FORK_RLIMIT_NOFILE_SAFE) {
30,244✔
1824
                r = rlimit_nofile_safe();
17,020✔
1825
                if (r < 0) {
17,020✔
1826
                        log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m");
×
1827
                        _exit(EXIT_FAILURE);
×
1828
                }
1829
        }
1830

1831
        if (!FLAGS_SET(flags, FORK_KEEP_NOTIFY_SOCKET)) {
30,244✔
1832
                r = RET_NERRNO(unsetenv("NOTIFY_SOCKET"));
30,244✔
1833
                if (r < 0) {
×
1834
                        log_full_errno(prio, r, "Failed to unset $NOTIFY_SOCKET: %m");
×
1835
                        _exit(EXIT_FAILURE);
×
1836
                }
1837
        }
1838

1839
        if (FLAGS_SET(flags, FORK_FREEZE))
30,244✔
1840
                freeze();
×
1841

1842
        if (ret_pid) {
30,244✔
1843
                if (FLAGS_SET(flags, _FORK_PID_ONLY))
28,733✔
1844
                        *ret_pid = PIDREF_MAKE_FROM_PID(getpid_cached());
7,462✔
1845
                else {
1846
                        r = pidref_set_self(ret_pid);
21,271✔
1847
                        if (r < 0) {
21,271✔
1848
                                log_full_errno(prio, r, "Failed to acquire PID reference on ourselves: %m");
×
1849
                                _exit(EXIT_FAILURE);
×
1850
                        }
1851
                }
1852
        }
1853

1854
        return 0;
1855
}
1856

1857
int safe_fork_full(
9,182✔
1858
                const char *name,
1859
                const int stdio_fds[3],
1860
                int except_fds[],
1861
                size_t n_except_fds,
1862
                ForkFlags flags,
1863
                pid_t *ret_pid) {
1864

1865
        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
17,783✔
1866
        int r;
9,182✔
1867

1868
        /* Getting the detached child process pid without pidfd is racy, so don't allow it if not returning
1869
         * a pidref to the caller. */
1870
        assert(!FLAGS_SET(flags, FORK_DETACH) || !ret_pid);
9,182✔
1871

1872
        r = pidref_safe_fork_full(name, stdio_fds, except_fds, n_except_fds, flags|_FORK_PID_ONLY, ret_pid ? &pidref : NULL);
10,555✔
1873
        if (r < 0 || !ret_pid)
17,783✔
1874
                return r;
1875

1876
        *ret_pid = pidref.pid;
15,020✔
1877

1878
        return r;
15,020✔
1879
}
1880

1881
int namespace_fork(
224✔
1882
                const char *outer_name,
1883
                const char *inner_name,
1884
                int except_fds[],
1885
                size_t n_except_fds,
1886
                ForkFlags flags,
1887
                int pidns_fd,
1888
                int mntns_fd,
1889
                int netns_fd,
1890
                int userns_fd,
1891
                int root_fd,
1892
                pid_t *ret_pid) {
1893

1894
        int r;
224✔
1895

1896
        /* This is much like safe_fork(), but forks twice, and joins the specified namespaces in the middle
1897
         * process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that
1898
         * /proc/self/fd works correctly. */
1899

1900
        r = safe_fork_full(outer_name,
629✔
1901
                           NULL,
1902
                           except_fds, n_except_fds,
1903
                           (flags|FORK_DEATHSIG_SIGINT|FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGKILL) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid);
224✔
1904
        if (r < 0)
405✔
1905
                return r;
1906
        if (r == 0) {
405✔
1907
                pid_t pid;
181✔
1908

1909
                /* Child */
1910

1911
                r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd);
181✔
1912
                if (r < 0) {
181✔
1913
                        log_full_errno(FLAGS_SET(flags, FORK_LOG) ? LOG_ERR : LOG_DEBUG, r, "Failed to join namespace: %m");
×
1914
                        _exit(EXIT_FAILURE);
×
1915
                }
1916

1917
                /* We mask a few flags here that either make no sense for the grandchild, or that we don't have to do again */
1918
                r = safe_fork_full(inner_name,
544✔
1919
                                   NULL,
1920
                                   except_fds, n_except_fds,
1921
                                   flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_REARRANGE_STDIO), &pid);
181✔
1922
                if (r < 0)
363✔
1923
                        _exit(EXIT_FAILURE);
×
1924
                if (r == 0) {
363✔
1925
                        /* Child */
1926
                        if (ret_pid)
182✔
1927
                                *ret_pid = pid;
182✔
1928
                        return 0;
182✔
1929
                }
1930

1931
                r = wait_for_terminate_and_check(inner_name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0);
362✔
1932
                if (r < 0)
181✔
1933
                        _exit(EXIT_FAILURE);
×
1934

1935
                _exit(r);
181✔
1936
        }
1937

1938
        return 1;
1939
}
1940

1941
int set_oom_score_adjust(int value) {
3,651✔
1942
        char t[DECIMAL_STR_MAX(int)];
3,651✔
1943

1944
        if (!oom_score_adjust_is_valid(value))
3,651✔
1945
                return -EINVAL;
3,651✔
1946

1947
        xsprintf(t, "%i", value);
3,651✔
1948

1949
        return write_string_file("/proc/self/oom_score_adj", t,
3,651✔
1950
                                 WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);
1951
}
1952

1953
int get_oom_score_adjust(int *ret) {
2,327✔
1954
        _cleanup_free_ char *t = NULL;
2,327✔
1955
        int r, a;
2,327✔
1956

1957
        r = read_virtual_file("/proc/self/oom_score_adj", SIZE_MAX, &t, NULL);
2,327✔
1958
        if (r < 0)
2,327✔
1959
                return r;
1960

1961
        delete_trailing_chars(t, WHITESPACE);
2,327✔
1962

1963
        r = safe_atoi(t, &a);
2,327✔
1964
        if (r < 0)
2,327✔
1965
                return r;
1966

1967
        if (!oom_score_adjust_is_valid(a))
2,327✔
1968
                return -ENODATA;
1969

1970
        if (ret)
2,327✔
1971
                *ret = a;
2,327✔
1972

1973
        return 0;
1974
}
1975

1976
static int rlimit_to_nice(rlim_t limit) {
2✔
1977
        if (limit <= 1)
2✔
1978
                return PRIO_MAX-1; /* i.e. 19 */
1979

1980
        if (limit >= -PRIO_MIN + PRIO_MAX)
2✔
1981
                return PRIO_MIN; /* i.e. -20 */
1982

1983
        return PRIO_MAX - (int) limit;
2✔
1984
}
1985

1986
int setpriority_closest(int priority) {
25✔
1987
        struct rlimit highest;
25✔
1988
        int r, current, limit;
25✔
1989

1990
        /* Try to set requested nice level */
1991
        r = RET_NERRNO(setpriority(PRIO_PROCESS, 0, priority));
25✔
1992
        if (r >= 0)
2✔
1993
                return 1;
23✔
1994
        if (!ERRNO_IS_NEG_PRIVILEGE(r))
2✔
1995
                return r;
1996

1997
        errno = 0;
2✔
1998
        current = getpriority(PRIO_PROCESS, 0);
2✔
1999
        if (errno != 0)
2✔
2000
                return -errno;
×
2001

2002
        if (priority == current)
2✔
2003
                return 1;
2004

2005
       /* Hmm, we'd expect that raising the nice level from our status quo would always work. If it doesn't,
2006
        * then the whole setpriority() system call is blocked to us, hence let's propagate the error
2007
        * right-away */
2008
        if (priority > current)
2✔
2009
                return r;
2010

2011
        if (getrlimit(RLIMIT_NICE, &highest) < 0)
2✔
2012
                return -errno;
×
2013

2014
        limit = rlimit_to_nice(highest.rlim_cur);
2✔
2015

2016
        /* Push to the allowed limit if we're higher than that. Note that we could also be less nice than
2017
         * limit allows us, but still higher than what's requested. In that case our current value is
2018
         * the best choice. */
2019
        if (current > limit)
2✔
2020
                if (setpriority(PRIO_PROCESS, 0, limit) < 0)
2✔
2021
                        return -errno;
×
2022

2023
        log_debug("Cannot set requested nice level (%i), using next best (%i).", priority, MIN(current, limit));
2✔
2024
        return 0;
2025
}
2026

2027
_noreturn_ void freeze(void) {
×
2028
        log_close();
×
2029

2030
        /* Make sure nobody waits for us (i.e. on one of our sockets) anymore. Note that we use
2031
         * close_all_fds_without_malloc() instead of plain close_all_fds() here, since we want this function
2032
         * to be compatible with being called from signal handlers. */
2033
        (void) close_all_fds_without_malloc(NULL, 0);
×
2034

2035
        /* Let's not freeze right away, but keep reaping zombies. */
2036
        for (;;) {
×
2037
                siginfo_t si = {};
×
2038

2039
                if (waitid(P_ALL, 0, &si, WEXITED) < 0 && errno != EINTR)
×
2040
                        break;
2041
        }
2042

2043
        /* waitid() failed with an ECHLD error (because there are no left-over child processes) or any other
2044
         * (unexpected) error. Freeze for good now! */
2045
        for (;;)
×
2046
                pause();
×
2047
}
2048

2049
int get_process_threads(pid_t pid) {
7✔
2050
        _cleanup_free_ char *t = NULL;
7✔
2051
        int n, r;
7✔
2052

2053
        if (pid < 0)
7✔
2054
                return -EINVAL;
2055

2056
        r = procfs_file_get_field(pid, "status", "Threads", &t);
7✔
2057
        if (r == -ENOENT)
7✔
2058
                return -ESRCH;
2059
        if (r < 0)
7✔
2060
                return r;
2061

2062
        r = safe_atoi(t, &n);
7✔
2063
        if (r < 0)
7✔
2064
                return r;
2065
        if (n < 0)
7✔
2066
                return -EINVAL;
×
2067

2068
        return n;
2069
}
2070

2071
int is_reaper_process(void) {
3,627✔
2072
        int b = 0;
3,627✔
2073

2074
        /* Checks if we are running in a reaper process, i.e. if we are expected to deal with processes
2075
         * reparented to us. This simply checks if we are PID 1 or if PR_SET_CHILD_SUBREAPER was called. */
2076

2077
        if (getpid_cached() == 1)
3,627✔
2078
                return true;
3,627✔
2079

2080
        if (prctl(PR_GET_CHILD_SUBREAPER, (unsigned long) &b, 0UL, 0UL, 0UL) < 0)
349✔
2081
                return -errno;
×
2082

2083
        return b != 0;
349✔
2084
}
2085

2086
int make_reaper_process(bool b) {
672✔
2087

2088
        if (getpid_cached() == 1) {
672✔
2089

2090
                if (!b)
52✔
2091
                        return -EINVAL;
2092

2093
                return 0;
52✔
2094
        }
2095

2096
        /* Some prctl()s insist that all 5 arguments are specified, others do not. Let's always specify all,
2097
         * to avoid any ambiguities */
2098
        if (prctl(PR_SET_CHILD_SUBREAPER, (unsigned long) b, 0UL, 0UL, 0UL) < 0)
620✔
2099
                return -errno;
×
2100

2101
        return 0;
2102
}
2103

2104
DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(posix_spawnattr_t*, posix_spawnattr_destroy, NULL);
×
2105

2106
int posix_spawn_wrapper(
2,425✔
2107
                const char *path,
2108
                char * const *argv,
2109
                char * const *envp,
2110
                const char *cgroup,
2111
                PidRef *ret_pidref) {
2112

2113
        short flags = POSIX_SPAWN_SETSIGMASK;
2,425✔
2114
        posix_spawnattr_t attr;
2,425✔
2115
        sigset_t mask;
2,425✔
2116
        int r;
2,425✔
2117

2118
        /* Forks and invokes 'path' with 'argv' and 'envp' using CLONE_VM and CLONE_VFORK, which means the
2119
         * caller will be blocked until the child either exits or exec's. The memory of the child will be
2120
         * fully shared with the memory of the parent, so that there are no copy-on-write or memory.max
2121
         * issues.
2122
         *
2123
         * Also, move the newly-created process into 'cgroup' through POSIX_SPAWN_SETCGROUP (clone3())
2124
         * if available.
2125
         * returns 1: We're already in the right cgroup
2126
         *         0: 'cgroup' not specified or POSIX_SPAWN_SETCGROUP is not supported. The caller
2127
         *            needs to call 'cg_attach' on their own */
2128

2129
        assert(path);
2,425✔
2130
        assert(argv);
2,425✔
2131
        assert(ret_pidref);
2,425✔
2132

2133
        assert_se(sigfillset(&mask) >= 0);
2,425✔
2134

2135
        r = posix_spawnattr_init(&attr);
2,425✔
2136
        if (r != 0)
2,425✔
2137
                return -r; /* These functions return a positive errno on failure */
2,425✔
2138

2139
        /* Initialization needs to succeed before we can set up a destructor. */
2140
        _unused_ _cleanup_(posix_spawnattr_destroyp) posix_spawnattr_t *attr_destructor = &attr;
4,850✔
2141

2142
#if HAVE_PIDFD_SPAWN
2143
        static bool have_clone_into_cgroup = true; /* kernel 5.7+ */
2,425✔
2144
        _cleanup_close_ int cgroup_fd = -EBADF;
2,425✔
2145

2146
        if (cgroup && have_clone_into_cgroup) {
2,425✔
2147
                _cleanup_free_ char *resolved_cgroup = NULL;
2,425✔
2148

2149
                r = cg_get_path(cgroup, /* suffix= */ NULL, &resolved_cgroup);
2,425✔
2150
                if (r < 0)
2,425✔
2151
                        return r;
2152

2153
                cgroup_fd = open(resolved_cgroup, O_PATH|O_DIRECTORY|O_CLOEXEC);
2,425✔
2154
                if (cgroup_fd < 0)
2,425✔
2155
                        return -errno;
×
2156

2157
                r = posix_spawnattr_setcgroup_np(&attr, cgroup_fd);
2,425✔
2158
                if (r != 0)
2,425✔
2159
                        return -r;
×
2160

2161
                flags |= POSIX_SPAWN_SETCGROUP;
2,425✔
2162
        }
2163
#endif
2164

2165
        r = posix_spawnattr_setflags(&attr, flags);
2,425✔
2166
        if (r != 0)
2,425✔
2167
                return -r;
×
2168
        r = posix_spawnattr_setsigmask(&attr, &mask);
2,425✔
2169
        if (r != 0)
2,425✔
2170
                return -r;
×
2171

2172
#if HAVE_PIDFD_SPAWN
2173
        _cleanup_close_ int pidfd = -EBADF;
2,425✔
2174

2175
        r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
2,425✔
2176
        if (ERRNO_IS_NOT_SUPPORTED(r) && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) && cg_is_threaded(cgroup) > 0)
2,425✔
2177
                return -EUCLEAN; /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode,
2178
                                    turn that into something recognizable */
2179
        if ((ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || r == E2BIG) &&
2,425✔
2180
            FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) {
2181
                /* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but
2182
                 * need to disable POSIX_SPAWN_SETCGROUP, which is what redirects to clone3().
2183
                 * Note that we might get E2BIG here since some kernels (e.g. 5.4) support clone3()
2184
                 * but not CLONE_INTO_CGROUP. */
2185

2186
                /* CLONE_INTO_CGROUP definitely won't work, hence remember the fact so that we don't
2187
                 * retry every time. */
2188
                have_clone_into_cgroup = false;
×
2189

2190
                flags &= ~POSIX_SPAWN_SETCGROUP;
×
2191
                r = posix_spawnattr_setflags(&attr, flags);
×
2192
                if (r != 0)
×
2193
                        return -r;
×
2194

2195
                r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
×
2196
        }
2197
        if (r != 0)
2,425✔
2198
                return -r;
×
2199

2200
        r = pidref_set_pidfd_consume(ret_pidref, TAKE_FD(pidfd));
2,425✔
2201
        if (r < 0)
2,425✔
2202
                return r;
2203

2204
        return FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP);
2,425✔
2205
#else
2206
        pid_t pid;
2207

2208
        r = posix_spawn(&pid, path, NULL, &attr, argv, envp);
2209
        if (r != 0)
2210
                return -r;
2211

2212
        r = pidref_set_pid(ret_pidref, pid);
2213
        if (r < 0)
2214
                return r;
2215

2216
        return 0; /* We did not use CLONE_INTO_CGROUP so return 0, the caller will have to move the child */
2217
#endif
2218
}
2219

2220
int proc_dir_open(DIR **ret) {
13✔
2221
        DIR *d;
13✔
2222

2223
        assert(ret);
13✔
2224

2225
        d = opendir("/proc");
13✔
2226
        if (!d)
13✔
2227
                return -errno;
×
2228

2229
        *ret = d;
13✔
2230
        return 0;
13✔
2231
}
2232

2233
int proc_dir_read(DIR *d, pid_t *ret) {
1,165✔
2234
        assert(d);
1,165✔
2235

2236
        for (;;) {
1,949✔
2237
                struct dirent *de;
1,949✔
2238

2239
                errno = 0;
1,949✔
2240
                de = readdir_no_dot(d);
1,949✔
2241
                if (!de) {
1,949✔
2242
                        if (errno != 0)
13✔
2243
                                return -errno;
×
2244

2245
                        break;
13✔
2246
                }
2247

2248
                if (!IN_SET(de->d_type, DT_DIR, DT_UNKNOWN))
1,936✔
2249
                        continue;
641✔
2250

2251
                if (parse_pid(de->d_name, ret) >= 0)
1,295✔
2252
                        return 1;
2253
        }
2254

2255
        if (ret)
13✔
2256
                *ret = 0;
13✔
2257
        return 0;
2258
}
2259

2260
int proc_dir_read_pidref(DIR *d, PidRef *ret) {
1,123✔
2261
        int r;
1,123✔
2262

2263
        assert(d);
1,123✔
2264

2265
        for (;;) {
1,123✔
2266
                pid_t pid;
1,123✔
2267

2268
                r = proc_dir_read(d, &pid);
1,123✔
2269
                if (r < 0)
1,123✔
2270
                        return r;
1,111✔
2271
                if (r == 0)
1,123✔
2272
                        break;
2273

2274
                r = pidref_set_pid(ret, pid);
1,111✔
2275
                if (r == -ESRCH) /* gone by now? skip it */
1,111✔
UNCOV
2276
                        continue;
×
2277
                if (r < 0)
1,111✔
2278
                        return r;
×
2279

2280
                return 1;
2281
        }
2282

2283
        if (ret)
12✔
2284
                *ret = PIDREF_NULL;
12✔
2285
        return 0;
2286
}
2287

2288
static const char *const sigchld_code_table[] = {
2289
        [CLD_EXITED] = "exited",
2290
        [CLD_KILLED] = "killed",
2291
        [CLD_DUMPED] = "dumped",
2292
        [CLD_TRAPPED] = "trapped",
2293
        [CLD_STOPPED] = "stopped",
2294
        [CLD_CONTINUED] = "continued",
2295
};
2296

2297
DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
8,827✔
2298

2299
static const char* const sched_policy_table[] = {
2300
        [SCHED_OTHER] = "other",
2301
        [SCHED_BATCH] = "batch",
2302
        [SCHED_IDLE] = "idle",
2303
        [SCHED_FIFO] = "fifo",
2304
        [SCHED_RR] = "rr",
2305
};
2306

2307
DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);
52✔
2308

2309
_noreturn_ void report_errno_and_exit(int errno_fd, int error) {
64✔
2310
        int r;
64✔
2311

2312
        if (error >= 0)
64✔
2313
                _exit(EXIT_SUCCESS);
63✔
2314

2315
        assert(errno_fd >= 0);
1✔
2316

2317
        r = loop_write(errno_fd, &error, sizeof(error));
1✔
2318
        if (r < 0)
1✔
2319
                log_debug_errno(r, "Failed to write errno to errno_fd=%d: %m", errno_fd);
×
2320

2321
        _exit(EXIT_FAILURE);
1✔
2322
}
2323

2324
int read_errno(int errno_fd) {
3✔
2325
        int r;
3✔
2326

2327
        assert(errno_fd >= 0);
3✔
2328

2329
        /* The issue here is that it's impossible to distinguish between an error code returned by child and
2330
         * IO error arose when reading it. So, the function logs errors and return EIO for the later case. */
2331

2332
        ssize_t n = loop_read(errno_fd, &r, sizeof(r), /* do_poll = */ false);
3✔
2333
        if (n < 0) {
3✔
2334
                log_debug_errno(n, "Failed to read errno: %m");
×
2335
                return -EIO;
×
2336
        }
2337
        if (n == sizeof(r)) {
3✔
2338
                if (r == 0)
×
2339
                        return 0;
2340
                if (r < 0) /* child process reported an error, return it */
×
2341
                        return log_debug_errno(r, "Child process failed with errno: %m");
×
2342
                return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Received an errno, but it's a positive value.");
×
2343
        }
2344
        if (n != 0)
3✔
2345
                return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Received unexpected amount of bytes while reading errno.");
×
2346

2347
        /* the process exited without reporting an error, assuming success */
2348
        return 0;
2349
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc