• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 14805371838

02 May 2025 11:41AM UTC coverage: 72.244% (+0.001%) from 72.243%
14805371838

push

github

web-flow
Various changes to prepare for running IWYU on the repository (#37319)

These are various commits that were required to get things compiling
after running IWYU. I think all of them make sense on their own, hence
this split PR to merge them ahead of time.

81 of 96 new or added lines in 48 files covered. (84.38%)

178 existing lines in 39 files now uncovered.

297233 of 411432 relevant lines covered (72.24%)

694826.11 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.12
/src/basic/process-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <ctype.h>
4
#include <errno.h>
5
#include <limits.h>
6
#include <linux/oom.h>
7
#include <pthread.h>
8
#include <spawn.h>
9
#include <stdbool.h>
10
#include <stdio.h>
11
#include <stdlib.h>
12
#include <sys/mount.h>
13
#include <sys/personality.h>
14
#include <sys/prctl.h>
15
#include <sys/types.h>
16
#include <sys/wait.h>
17
#include <syslog.h>
18
#include <threads.h>
19
#include <unistd.h>
20
#if HAVE_VALGRIND_VALGRIND_H
21
#include <valgrind/valgrind.h>
22
#endif
23

24
#include "sd-messages.h"
25

26
#include "alloc-util.h"
27
#include "architecture.h"
28
#include "argv-util.h"
29
#include "cgroup-util.h"
30
#include "dirent-util.h"
31
#include "env-file.h"
32
#include "env-util.h"
33
#include "errno-util.h"
34
#include "escape.h"
35
#include "fd-util.h"
36
#include "fileio.h"
37
#include "fs-util.h"
38
#include "hostname-util.h"
39
#include "io-util.h"
40
#include "iovec-util.h"
41
#include "locale-util.h"
42
#include "log.h"
43
#include "macro.h"
44
#include "memory-util.h"
45
#include "missing_sched.h"
46
#include "missing_syscall.h"
47
#include "mountpoint-util.h"
48
#include "namespace-util.h"
49
#include "nulstr-util.h"
50
#include "parse-util.h"
51
#include "path-util.h"
52
#include "pidfd-util.h"
53
#include "process-util.h"
54
#include "raw-clone.h"
55
#include "rlimit-util.h"
56
#include "signal-util.h"
57
#include "socket-util.h"
58
#include "stat-util.h"
59
#include "stdio-util.h"
60
#include "string-table.h"
61
#include "string-util.h"
62
#include "terminal-util.h"
63
#include "time-util.h"
64
#include "user-util.h"
65
#include "utf8.h"
66

67
/* The kernel limits userspace processes to TASK_COMM_LEN (16 bytes), but allows higher values for its own
68
 * workers, e.g. "kworker/u9:3-kcryptd/253:0". Let's pick a fixed smallish limit that will work for the kernel.
69
 */
70
#define COMM_MAX_LEN 128
71

72
static int get_process_state(pid_t pid) {
9,974✔
73
        _cleanup_free_ char *line = NULL;
9,974✔
74
        const char *p;
9,974✔
75
        char state;
9,974✔
76
        int r;
9,974✔
77

78
        assert(pid >= 0);
9,974✔
79

80
        /* Shortcut: if we are enquired about our own state, we are obviously running */
81
        if (pid == 0 || pid == getpid_cached())
9,974✔
82
                return (unsigned char) 'R';
×
83

84
        p = procfs_file_alloca(pid, "stat");
9,974✔
85

86
        r = read_one_line_file(p, &line);
9,974✔
87
        if (r == -ENOENT)
9,974✔
88
                return -ESRCH;
89
        if (r < 0)
8,052✔
90
                return r;
91

92
        p = strrchr(line, ')');
8,052✔
93
        if (!p)
8,052✔
94
                return -EIO;
95

96
        p++;
8,052✔
97

98
        if (sscanf(p, " %c", &state) != 1)
8,052✔
99
                return -EIO;
100

101
        return (unsigned char) state;
8,052✔
102
}
103

104
int pid_get_comm(pid_t pid, char **ret) {
105
        _cleanup_free_ char *escaped = NULL, *comm = NULL;
46,340✔
106
        int r;
46,340✔
107

108
        assert(pid >= 0);
46,340✔
109
        assert(ret);
46,340✔
110

111
        if (pid == 0 || pid == getpid_cached()) {
46,340✔
112
                comm = new0(char, TASK_COMM_LEN + 1); /* Must fit in 16 byte according to prctl(2) */
25,600✔
113
                if (!comm)
25,600✔
114
                        return -ENOMEM;
115

116
                if (prctl(PR_GET_NAME, comm) < 0)
25,600✔
117
                        return -errno;
×
118
        } else {
119
                const char *p;
20,740✔
120

121
                p = procfs_file_alloca(pid, "comm");
20,740✔
122

123
                /* Note that process names of kernel threads can be much longer than TASK_COMM_LEN */
124
                r = read_one_line_file(p, &comm);
20,740✔
125
                if (r == -ENOENT)
20,740✔
126
                        return -ESRCH;
127
                if (r < 0)
17,139✔
128
                        return r;
129
        }
130

131
        escaped = new(char, COMM_MAX_LEN);
42,735✔
132
        if (!escaped)
42,735✔
133
                return -ENOMEM;
134

135
        /* Escape unprintable characters, just in case, but don't grow the string beyond the underlying size */
136
        cellescape(escaped, COMM_MAX_LEN, comm);
42,735✔
137

138
        *ret = TAKE_PTR(escaped);
42,735✔
139
        return 0;
42,735✔
140
}
141

142
int pidref_get_comm(const PidRef *pid, char **ret) {
143
        _cleanup_free_ char *comm = NULL;
29✔
144
        int r;
29✔
145

146
        if (!pidref_is_set(pid))
29✔
147
                return -ESRCH;
148

149
        if (pidref_is_remote(pid))
58✔
150
                return -EREMOTE;
151

152
        r = pid_get_comm(pid->pid, &comm);
29✔
153
        if (r < 0)
29✔
154
                return r;
155

156
        r = pidref_verify(pid);
29✔
157
        if (r < 0)
29✔
158
                return r;
159

160
        if (ret)
29✔
161
                *ret = TAKE_PTR(comm);
29✔
162
        return 0;
163
}
164

165
static int pid_get_cmdline_nulstr(
17,418✔
166
                pid_t pid,
167
                size_t max_size,
168
                ProcessCmdlineFlags flags,
169
                char **ret,
170
                size_t *ret_size) {
171

172
        _cleanup_free_ char *t = NULL;
17,418✔
173
        const char *p;
17,418✔
174
        size_t k;
17,418✔
175
        int r;
17,418✔
176

177
        /* Retrieves a process' command line as a "sized nulstr", i.e. possibly without the last NUL, but
178
         * with a specified size.
179
         *
180
         * If PROCESS_CMDLINE_COMM_FALLBACK is specified in flags and the process has no command line set
181
         * (the case for kernel threads), or has a command line that resolves to the empty string, will
182
         * return the "comm" name of the process instead. This will use at most _SC_ARG_MAX bytes of input
183
         * data.
184
         *
185
         * Returns an error, 0 if output was read but is truncated, 1 otherwise.
186
         */
187

188
        p = procfs_file_alloca(pid, "cmdline");
17,634✔
189
        r = read_virtual_file(p, max_size, &t, &k); /* Let's assume that each input byte results in >= 1
17,418✔
190
                                                     * columns of output. We ignore zero-width codepoints. */
191
        if (r == -ENOENT)
17,418✔
192
                return -ESRCH;
193
        if (r < 0)
14,428✔
194
                return r;
195

196
        if (k == 0) {
14,427✔
197
                if (!(flags & PROCESS_CMDLINE_COMM_FALLBACK))
504✔
198
                        return -ENOENT;
485✔
199

200
                /* Kernel threads have no argv[] */
201
                _cleanup_free_ char *comm = NULL;
19✔
202

203
                r = pid_get_comm(pid, &comm);
19✔
204
                if (r < 0)
19✔
205
                        return r;
206

207
                free(t);
19✔
208
                t = strjoin("[", comm, "]");
19✔
209
                if (!t)
19✔
210
                        return -ENOMEM;
211

212
                k = strlen(t);
19✔
213
                r = k <= max_size;
19✔
214
                if (r == 0) /* truncation */
19✔
215
                        t[max_size] = '\0';
12✔
216
        }
217

218
        if (ret)
13,942✔
219
                *ret = TAKE_PTR(t);
13,942✔
220
        if (ret_size)
13,942✔
221
                *ret_size = k;
13,942✔
222

223
        return r;
224
}
225

226
int pid_get_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags, char **ret) {
227
        _cleanup_free_ char *t = NULL;
12,573✔
228
        size_t k;
12,573✔
229
        char *ans;
12,573✔
230

231
        assert(pid >= 0);
12,573✔
232
        assert(ret);
12,573✔
233

234
        /* Retrieve and format a command line. See above for discussion of retrieval options.
235
         *
236
         * There are two main formatting modes:
237
         *
238
         * - when PROCESS_CMDLINE_QUOTE is specified, output is quoted in C/Python style. If no shell special
239
         *   characters are present, this output can be copy-pasted into the terminal to execute. UTF-8
240
         *   output is assumed.
241
         *
242
         * - otherwise, a compact non-roundtrippable form is returned. Non-UTF8 bytes are replaced by �. The
243
         *   returned string is of the specified console width at most, abbreviated with an ellipsis.
244
         *
245
         * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
246
         * PROCESS_CMDLINE_COMM_FALLBACK is not specified). Returns 0 and sets *line otherwise. */
247

248
        int full = pid_get_cmdline_nulstr(pid, max_columns, flags, &t, &k);
12,573✔
249
        if (full < 0)
12,573✔
250
                return full;
251

252
        if (flags & (PROCESS_CMDLINE_QUOTE | PROCESS_CMDLINE_QUOTE_POSIX)) {
9,173✔
253
                ShellEscapeFlags shflags = SHELL_ESCAPE_EMPTY |
8,760✔
254
                        FLAGS_SET(flags, PROCESS_CMDLINE_QUOTE_POSIX) * SHELL_ESCAPE_POSIX;
8,760✔
255

256
                assert(!(flags & PROCESS_CMDLINE_USE_LOCALE));
8,760✔
257

258
                _cleanup_strv_free_ char **args = NULL;
8,760✔
259

260
                /* Drop trailing NULs, otherwise strv_parse_nulstr() adds additional empty strings at the end.
261
                 * See also issue #21186. */
262
                args = strv_parse_nulstr_full(t, k, /* drop_trailing_nuls = */ true);
8,760✔
263
                if (!args)
8,760✔
264
                        return -ENOMEM;
265

266
                ans = quote_command_line(args, shflags);
8,760✔
267
                if (!ans)
8,760✔
268
                        return -ENOMEM;
269
        } else {
270
                /* Arguments are separated by NULs. Let's replace those with spaces. */
271
                for (size_t i = 0; i < k - 1; i++)
19,585✔
272
                        if (t[i] == '\0')
19,172✔
273
                                t[i] = ' ';
688✔
274

275
                delete_trailing_chars(t, WHITESPACE);
413✔
276

277
                bool eight_bit = (flags & PROCESS_CMDLINE_USE_LOCALE) && !is_locale_utf8();
413✔
278

279
                ans = escape_non_printable_full(t, max_columns,
1,239✔
280
                                                eight_bit * XESCAPE_8_BIT | !full * XESCAPE_FORCE_ELLIPSIS);
772✔
281
                if (!ans)
413✔
282
                        return -ENOMEM;
283

284
                ans = str_realloc(ans);
413✔
285
        }
286

287
        *ret = ans;
9,173✔
288
        return 0;
9,173✔
289
}
290

291
int pidref_get_cmdline(const PidRef *pid, size_t max_columns, ProcessCmdlineFlags flags, char **ret) {
292
        _cleanup_free_ char *s = NULL;
103✔
293
        int r;
103✔
294

295
        if (!pidref_is_set(pid))
103✔
296
                return -ESRCH;
297

298
        if (pidref_is_remote(pid))
206✔
299
                return -EREMOTE;
300

301
        r = pid_get_cmdline(pid->pid, max_columns, flags, &s);
103✔
302
        if (r < 0)
103✔
303
                return r;
304

305
        r = pidref_verify(pid);
103✔
306
        if (r < 0)
103✔
307
                return r;
308

309
        if (ret)
103✔
310
                *ret = TAKE_PTR(s);
103✔
311
        return 0;
312
}
313

314
int pid_get_cmdline_strv(pid_t pid, ProcessCmdlineFlags flags, char ***ret) {
315
        _cleanup_free_ char *t = NULL;
4,845✔
316
        char **args;
4,845✔
317
        size_t k;
4,845✔
318
        int r;
4,845✔
319

320
        assert(pid >= 0);
4,845✔
321
        assert((flags & ~PROCESS_CMDLINE_COMM_FALLBACK) == 0);
4,845✔
322
        assert(ret);
4,845✔
323

324
        r = pid_get_cmdline_nulstr(pid, SIZE_MAX, flags, &t, &k);
4,845✔
325
        if (r < 0)
4,845✔
326
                return r;
327

328
        args = strv_parse_nulstr_full(t, k, /* drop_trailing_nuls = */ true);
4,769✔
329
        if (!args)
4,769✔
330
                return -ENOMEM;
331

332
        *ret = args;
4,769✔
333
        return 0;
4,769✔
334
}
335

336
int pidref_get_cmdline_strv(const PidRef *pid, ProcessCmdlineFlags flags, char ***ret) {
337
        _cleanup_strv_free_ char **args = NULL;
×
338
        int r;
×
339

340
        if (!pidref_is_set(pid))
×
341
                return -ESRCH;
342

343
        if (pidref_is_remote(pid))
×
344
                return -EREMOTE;
345

346
        r = pid_get_cmdline_strv(pid->pid, flags, &args);
×
347
        if (r < 0)
×
348
                return r;
349

350
        r = pidref_verify(pid);
×
351
        if (r < 0)
×
352
                return r;
353

354
        if (ret)
×
355
                *ret = TAKE_PTR(args);
×
356

357
        return 0;
358
}
359

360
int container_get_leader(const char *machine, pid_t *pid) {
361
        _cleanup_free_ char *s = NULL, *class = NULL;
10✔
362
        const char *p;
10✔
363
        pid_t leader;
10✔
364
        int r;
10✔
365

366
        assert(machine);
10✔
367
        assert(pid);
10✔
368

369
        if (streq(machine, ".host")) {
10✔
370
                *pid = 1;
1✔
371
                return 0;
1✔
372
        }
373

374
        if (!hostname_is_valid(machine, 0))
9✔
375
                return -EINVAL;
376

377
        p = strjoina("/run/systemd/machines/", machine);
45✔
378
        r = parse_env_file(NULL, p,
9✔
379
                           "LEADER", &s,
380
                           "CLASS", &class);
381
        if (r == -ENOENT)
9✔
382
                return -EHOSTDOWN;
383
        if (r < 0)
9✔
384
                return r;
385
        if (!s)
9✔
386
                return -EIO;
387

388
        if (!streq_ptr(class, "container"))
9✔
389
                return -EIO;
390

391
        r = parse_pid(s, &leader);
9✔
392
        if (r < 0)
9✔
393
                return r;
394
        if (leader <= 1)
9✔
395
                return -EIO;
396

397
        *pid = leader;
9✔
398
        return 0;
9✔
399
}
400

401
int pid_is_kernel_thread(pid_t pid) {
402
        _cleanup_free_ char *line = NULL;
3,545✔
403
        unsigned long long flags;
3,545✔
404
        size_t l, i;
3,545✔
405
        const char *p;
3,545✔
406
        char *q;
3,545✔
407
        int r;
3,545✔
408

409
        if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
3,545✔
410
                return 0;
25✔
411
        if (!pid_is_valid(pid))
3,520✔
412
                return -EINVAL;
413

414
        p = procfs_file_alloca(pid, "stat");
3,520✔
415
        r = read_one_line_file(p, &line);
3,520✔
416
        if (r == -ENOENT)
3,520✔
417
                return -ESRCH;
418
        if (r < 0)
3,520✔
419
                return r;
420

421
        /* Skip past the comm field */
422
        q = strrchr(line, ')');
3,520✔
423
        if (!q)
3,520✔
424
                return -EINVAL;
425
        q++;
3,520✔
426

427
        /* Skip 6 fields to reach the flags field */
428
        for (i = 0; i < 6; i++) {
24,640✔
429
                l = strspn(q, WHITESPACE);
21,120✔
430
                if (l < 1)
21,120✔
431
                        return -EINVAL;
432
                q += l;
21,120✔
433

434
                l = strcspn(q, WHITESPACE);
21,120✔
435
                if (l < 1)
21,120✔
436
                        return -EINVAL;
437
                q += l;
21,120✔
438
        }
439

440
        /* Skip preceding whitespace */
441
        l = strspn(q, WHITESPACE);
3,520✔
442
        if (l < 1)
3,520✔
443
                return -EINVAL;
444
        q += l;
3,520✔
445

446
        /* Truncate the rest */
447
        l = strcspn(q, WHITESPACE);
3,520✔
448
        if (l < 1)
3,520✔
449
                return -EINVAL;
450
        q[l] = 0;
3,520✔
451

452
        r = safe_atollu(q, &flags);
3,520✔
453
        if (r < 0)
3,520✔
454
                return r;
455

456
        return !!(flags & PF_KTHREAD);
3,520✔
457
}
458

459
int pidref_is_kernel_thread(const PidRef *pid) {
460
        int result, r;
1,440✔
461

462
        if (!pidref_is_set(pid))
1,440✔
463
                return -ESRCH;
464

465
        if (pidref_is_remote(pid))
1,440✔
466
                return -EREMOTE;
467

468
        result = pid_is_kernel_thread(pid->pid);
1,440✔
469
        if (result < 0)
1,440✔
470
                return result;
471

472
        r = pidref_verify(pid); /* Verify that the PID wasn't reused since */
1,440✔
473
        if (r < 0)
1,440✔
474
                return r;
9✔
475

476
        return result;
477
}
478

479
static int get_process_link_contents(pid_t pid, const char *proc_file, char **ret) {
11,914✔
480
        const char *p;
11,914✔
481
        int r;
11,914✔
482

483
        assert(proc_file);
11,914✔
484

485
        p = procfs_file_alloca(pid, proc_file);
11,918✔
486

487
        r = readlink_malloc(p, ret);
11,914✔
488
        return (r == -ENOENT && proc_mounted() > 0) ? -ESRCH : r;
11,914✔
489
}
490

491
int get_process_exe(pid_t pid, char **ret) {
492
        char *d;
11,888✔
493
        int r;
11,888✔
494

495
        assert(pid >= 0);
11,888✔
496

497
        r = get_process_link_contents(pid, "exe", ret);
11,888✔
498
        if (r < 0)
11,888✔
499
                return r;
500

501
        if (ret) {
8,783✔
502
                d = endswith(*ret, " (deleted)");
8,783✔
503
                if (d)
8,783✔
504
                        *d = '\0';
×
505
        }
506

507
        return 0;
508
}
509

510
int pid_get_uid(pid_t pid, uid_t *ret) {
511
        int r;
2,693✔
512

513
        assert(pid >= 0);
2,693✔
514
        assert(ret);
2,693✔
515

516
        if (pid == 0 || pid == getpid_cached()) {
2,693✔
517
                *ret = getuid();
4✔
518
                return 0;
2,693✔
519
        }
520

521
        _cleanup_free_ char *v = NULL;
2,689✔
522
        r = procfs_file_get_field(pid, "status", "Uid", &v);
2,689✔
523
        if (r == -ENOENT)
2,689✔
524
                return -ESRCH;
525
        if (r < 0)
150✔
526
                return r;
527

528
        return parse_uid(v, ret);
150✔
529
}
530

531
int pidref_get_uid(const PidRef *pid, uid_t *ret) {
532
        int r;
54✔
533

534
        if (!pidref_is_set(pid))
54✔
535
                return -ESRCH;
54✔
536

537
        if (pidref_is_remote(pid))
54✔
538
                return -EREMOTE;
539

540
        if (pid->fd >= 0) {
54✔
541
                r = pidfd_get_uid(pid->fd, ret);
54✔
542
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
54✔
543
                        return r;
544
        }
545

546
        uid_t uid;
9✔
547
        r = pid_get_uid(pid->pid, &uid);
9✔
548
        if (r < 0)
9✔
549
                return r;
550

551
        r = pidref_verify(pid);
9✔
552
        if (r < 0)
9✔
553
                return r;
554

555
        if (ret)
9✔
556
                *ret = uid;
9✔
557
        return 0;
558
}
559

560
int get_process_gid(pid_t pid, gid_t *ret) {
561
        int r;
2,684✔
562

563
        assert(pid >= 0);
2,684✔
564
        assert(ret);
2,684✔
565

566
        if (pid == 0 || pid == getpid_cached()) {
2,684✔
567
                *ret = getgid();
1✔
568
                return 0;
2,684✔
569
        }
570

571
        _cleanup_free_ char *v = NULL;
2,683✔
572
        r = procfs_file_get_field(pid, "status", "Gid", &v);
2,683✔
573
        if (r == -ENOENT)
2,683✔
574
                return -ESRCH;
575
        if (r < 0)
144✔
576
                return r;
577

578
        return parse_gid(v, ret);
2,683✔
579
}
580

581
int get_process_cwd(pid_t pid, char **ret) {
582
        assert(pid >= 0);
13✔
583

584
        if (pid == 0 || pid == getpid_cached())
13✔
585
                return safe_getcwd(ret);
×
586

587
        return get_process_link_contents(pid, "cwd", ret);
13✔
588
}
589

590
int get_process_root(pid_t pid, char **ret) {
591
        assert(pid >= 0);
13✔
592
        return get_process_link_contents(pid, "root", ret);
13✔
593
}
594

595
#define ENVIRONMENT_BLOCK_MAX (5U*1024U*1024U)
596

597
int get_process_environ(pid_t pid, char **ret) {
598
        _cleanup_fclose_ FILE *f = NULL;
15✔
599
        _cleanup_free_ char *outcome = NULL;
15✔
600
        size_t sz = 0;
15✔
601
        const char *p;
15✔
602
        int r;
15✔
603

604
        assert(pid >= 0);
15✔
605
        assert(ret);
15✔
606

607
        p = procfs_file_alloca(pid, "environ");
15✔
608

609
        r = fopen_unlocked(p, "re", &f);
15✔
610
        if (r == -ENOENT)
15✔
611
                return -ESRCH;
612
        if (r < 0)
15✔
613
                return r;
614

615
        for (;;) {
6,449✔
616
                char c;
6,464✔
617

618
                if (sz >= ENVIRONMENT_BLOCK_MAX)
6,464✔
619
                        return -ENOBUFS;
×
620

621
                if (!GREEDY_REALLOC(outcome, sz + 5))
6,464✔
622
                        return -ENOMEM;
623

624
                r = safe_fgetc(f, &c);
6,464✔
625
                if (r < 0)
6,464✔
626
                        return r;
627
                if (r == 0)
6,464✔
628
                        break;
629

630
                if (c == '\0')
6,449✔
631
                        outcome[sz++] = '\n';
227✔
632
                else
633
                        sz += cescape_char(c, outcome + sz);
6,222✔
634
        }
635

636
        outcome[sz] = '\0';
15✔
637
        *ret = TAKE_PTR(outcome);
15✔
638

639
        return 0;
15✔
640
}
641

642
int pid_get_ppid(pid_t pid, pid_t *ret) {
643
        _cleanup_free_ char *line = NULL;
1,392✔
644
        unsigned long ppid;
1,392✔
645
        const char *p;
1,392✔
646
        int r;
1,392✔
647

648
        assert(pid >= 0);
1,392✔
649

650
        if (pid == 0)
1,392✔
651
                pid = getpid_cached();
1✔
652
        if (pid == 1) /* PID 1 has no parent, shortcut this case */
1,392✔
653
                return -EADDRNOTAVAIL;
654

655
        if (pid == getpid_cached()) {
1,388✔
656
                if (ret)
6✔
657
                        *ret = getppid();
6✔
658
                return 0;
6✔
659
        }
660

661
        p = procfs_file_alloca(pid, "stat");
1,382✔
662
        r = read_one_line_file(p, &line);
1,382✔
663
        if (r == -ENOENT)
1,382✔
664
                return -ESRCH;
665
        if (r < 0)
1,381✔
666
                return r;
667

668
        /* Let's skip the pid and comm fields. The latter is enclosed in () but does not escape any () in its
669
         * value, so let's skip over it manually */
670

671
        p = strrchr(line, ')');
1,381✔
672
        if (!p)
1,381✔
673
                return -EIO;
674
        p++;
1,381✔
675

676
        if (sscanf(p, " "
1,381✔
677
                   "%*c "  /* state */
678
                   "%lu ", /* ppid */
679
                   &ppid) != 1)
680
                return -EIO;
681

682
        /* If ppid is zero the process has no parent. Which might be the case for PID 1 (caught above)
683
         * but also for processes originating in other namespaces that are inserted into a pidns.
684
         * Return a recognizable error in this case. */
685
        if (ppid == 0)
1,381✔
686
                return -EADDRNOTAVAIL;
687

688
        if ((pid_t) ppid < 0 || (unsigned long) (pid_t) ppid != ppid)
1,381✔
689
                return -ERANGE;
690

691
        if (ret)
1,381✔
692
                *ret = (pid_t) ppid;
1,381✔
693

694
        return 0;
695
}
696

697
int pidref_get_ppid(const PidRef *pidref, pid_t *ret) {
698
        int r;
2,401✔
699

700
        if (!pidref_is_set(pidref))
2,401✔
701
                return -ESRCH;
2,401✔
702

703
        if (pidref_is_remote(pidref))
2,401✔
704
                return -EREMOTE;
705

706
        if (pidref->fd >= 0) {
2,401✔
707
                r = pidfd_get_ppid(pidref->fd, ret);
2,401✔
708
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
2,401✔
709
                        return r;
710
        }
711

712
        pid_t ppid;
1,386✔
713
        r = pid_get_ppid(pidref->pid, ret ? &ppid : NULL);
1,386✔
714
        if (r < 0)
1,386✔
715
                return r;
716

717
        r = pidref_verify(pidref);
1,385✔
718
        if (r < 0)
1,385✔
719
                return r;
720

721
        if (ret)
1,385✔
722
                *ret = ppid;
1,385✔
723
        return 0;
724
}
725

726
int pidref_get_ppid_as_pidref(const PidRef *pidref, PidRef *ret) {
727
        pid_t ppid;
17✔
728
        int r;
17✔
729

730
        assert(ret);
17✔
731

732
        r = pidref_get_ppid(pidref, &ppid);
17✔
733
        if (r < 0)
17✔
734
                return r;
17✔
735

736
        for (unsigned attempt = 0; attempt < 16; attempt++) {
16✔
737
                _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
16✔
738

739
                r = pidref_set_pid(&parent, ppid);
16✔
740
                if (r < 0)
16✔
741
                        return r;
742

743
                /* If we have a pidfd of the original PID, let's verify that the process we acquired really
744
                 * is the parent still */
745
                if (pidref->fd >= 0) {
16✔
746
                        r = pidref_get_ppid(pidref, &ppid);
16✔
747
                        if (r < 0)
16✔
748
                                return r;
749

750
                        /* Did the PPID change since we queried it? if so we might have pinned the wrong
751
                         * process, if its PID got reused by now. Let's try again */
752
                        if (parent.pid != ppid)
16✔
753
                                continue;
×
754
                }
755

756
                *ret = TAKE_PIDREF(parent);
16✔
757
                return 0;
16✔
758
        }
759

760
        /* Give up after 16 tries */
761
        return -ENOTRECOVERABLE;
762
}
763

764
int pid_get_start_time(pid_t pid, usec_t *ret) {
765
        _cleanup_free_ char *line = NULL;
659✔
766
        const char *p;
659✔
767
        int r;
659✔
768

769
        assert(pid >= 0);
659✔
770

771
        p = procfs_file_alloca(pid, "stat");
659✔
772
        r = read_one_line_file(p, &line);
659✔
773
        if (r == -ENOENT)
659✔
774
                return -ESRCH;
775
        if (r < 0)
659✔
776
                return r;
777

778
        /* Let's skip the pid and comm fields. The latter is enclosed in () but does not escape any () in its
779
         * value, so let's skip over it manually */
780

781
        p = strrchr(line, ')');
659✔
782
        if (!p)
659✔
783
                return -EIO;
784
        p++;
659✔
785

786
        unsigned long llu;
659✔
787

788
        if (sscanf(p, " "
659✔
789
                   "%*c " /* state */
790
                   "%*u " /* ppid */
791
                   "%*u " /* pgrp */
792
                   "%*u " /* session */
793
                   "%*u " /* tty_nr */
794
                   "%*u " /* tpgid */
795
                   "%*u " /* flags */
796
                   "%*u " /* minflt */
797
                   "%*u " /* cminflt */
798
                   "%*u " /* majflt */
799
                   "%*u " /* cmajflt */
800
                   "%*u " /* utime */
801
                   "%*u " /* stime */
802
                   "%*u " /* cutime */
803
                   "%*u " /* cstime */
804
                   "%*i " /* priority */
805
                   "%*i " /* nice */
806
                   "%*u " /* num_threads */
807
                   "%*u " /* itrealvalue */
808
                   "%lu ", /* starttime */
809
                   &llu) != 1)
810
                return -EIO;
811

812
        if (ret)
659✔
813
                *ret = jiffies_to_usec(llu); /* CLOCK_BOOTTIME */
659✔
814

815
        return 0;
816
}
817

818
int pidref_get_start_time(const PidRef *pid, usec_t *ret) {
819
        usec_t t;
659✔
820
        int r;
659✔
821

822
        if (!pidref_is_set(pid))
659✔
823
                return -ESRCH;
659✔
824

825
        if (pidref_is_remote(pid))
659✔
826
                return -EREMOTE;
827

828
        r = pid_get_start_time(pid->pid, ret ? &t : NULL);
659✔
829
        if (r < 0)
659✔
830
                return r;
831

832
        r = pidref_verify(pid);
659✔
833
        if (r < 0)
659✔
834
                return r;
835

836
        if (ret)
659✔
837
                *ret = t;
659✔
838

839
        return 0;
840
}
841

842
int get_process_umask(pid_t pid, mode_t *ret) {
843
        _cleanup_free_ char *m = NULL;
18,639✔
844
        int r;
18,639✔
845

846
        assert(pid >= 0);
18,639✔
847
        assert(ret);
18,639✔
848

849
        r = procfs_file_get_field(pid, "status", "Umask", &m);
18,639✔
850
        if (r == -ENOENT)
18,639✔
851
                return -ESRCH;
852
        if (r < 0)
18,639✔
853
                return r;
854

855
        return parse_mode(m, ret);
18,639✔
856
}
857

858
int wait_for_terminate(pid_t pid, siginfo_t *ret) {
859
        return pidref_wait_for_terminate(&PIDREF_MAKE_FROM_PID(pid), ret);
833✔
860
}
861

862
/*
863
 * Return values:
864
 * < 0 : wait_for_terminate() failed to get the state of the
865
 *       process, the process was terminated by a signal, or
866
 *       failed for an unknown reason.
867
 * >=0 : The process terminated normally, and its exit code is
868
 *       returned.
869
 *
870
 * That is, success is indicated by a return value of zero, and an
871
 * error is indicated by a non-zero value.
872
 *
873
 * A warning is emitted if the process terminates abnormally,
874
 * and also if it returns non-zero unless check_exit_code is true.
875
 */
876
int pidref_wait_for_terminate_and_check(const char *name, PidRef *pidref, WaitFlags flags) {
877
        int r;
9,082✔
878

879
        if (!pidref_is_set(pidref))
9,082✔
880
                return -ESRCH;
9,082✔
881
        if (pidref_is_remote(pidref))
18,164✔
882
                return -EREMOTE;
883
        if (pidref->pid == 1 || pidref_is_self(pidref))
9,082✔
884
                return -ECHILD;
×
885

886
        _cleanup_free_ char *buffer = NULL;
9,082✔
887
        if (!name) {
9,082✔
888
                r = pidref_get_comm(pidref, &buffer);
×
889
                if (r < 0)
×
890
                        log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pidref->pid);
×
891
                else
892
                        name = buffer;
×
893
        }
894

895
        int prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG;
9,082✔
896

897
        siginfo_t status;
9,082✔
898
        r = pidref_wait_for_terminate(pidref, &status);
9,082✔
899
        if (r < 0)
9,082✔
900
                return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name));
×
901

902
        if (status.si_code == CLD_EXITED) {
9,082✔
903
                if (status.si_status != EXIT_SUCCESS)
9,082✔
904
                        log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG,
48✔
905
                                 "%s failed with exit status %i.", strna(name), status.si_status);
906
                else
907
                        log_debug("%s succeeded.", name);
9,034✔
908

909
                return status.si_status;
9,082✔
910

911
        } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
×
912

913
                log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status));
×
914
                return -EPROTO;
×
915
        }
916

917
        log_full(prio, "%s failed due to unknown reason.", strna(name));
×
918
        return -EPROTO;
919
}
920

921
int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) {
922
        return pidref_wait_for_terminate_and_check(name, &PIDREF_MAKE_FROM_PID(pid), flags);
7,963✔
923
}
924

925
/*
926
 * Return values:
927
 *
928
 * < 0 : wait_for_terminate_with_timeout() failed to get the state of the process, the process timed out, the process
929
 *       was terminated by a signal, or failed for an unknown reason.
930
 *
931
 * >=0 : The process terminated normally with no failures.
932
 *
933
 * Success is indicated by a return value of zero, a timeout is indicated by ETIMEDOUT, and all other child failure
934
 * states are indicated by error is indicated by a non-zero value.
935
 *
936
 * This call assumes SIGCHLD has been blocked already, in particular before the child to wait for has been forked off
937
 * to remain entirely race-free.
938
 */
939
int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
940
        sigset_t mask;
×
941
        int r;
×
942
        usec_t until;
×
943

944
        assert_se(sigemptyset(&mask) == 0);
×
945
        assert_se(sigaddset(&mask, SIGCHLD) == 0);
×
946

947
        /* Drop into a sigtimewait-based timeout. Waiting for the
948
         * pid to exit. */
949
        until = usec_add(now(CLOCK_MONOTONIC), timeout);
×
950
        for (;;) {
×
951
                usec_t n;
×
952
                siginfo_t status = {};
×
953

954
                n = now(CLOCK_MONOTONIC);
×
955
                if (n >= until)
×
956
                        break;
957

958
                r = RET_NERRNO(sigtimedwait(&mask, NULL, TIMESPEC_STORE(until - n)));
×
959
                /* Assuming we woke due to the child exiting. */
960
                if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
×
961
                        if (status.si_pid == pid) {
×
962
                                /* This is the correct child. */
963
                                if (status.si_code == CLD_EXITED)
×
964
                                        return status.si_status == 0 ? 0 : -EPROTO;
×
965
                                else
966
                                        return -EPROTO;
967
                        }
968
                }
969
                /* Not the child, check for errors and proceed appropriately */
970
                if (r < 0) {
×
971
                        switch (r) {
×
972
                        case -EAGAIN:
973
                                /* Timed out, child is likely hung. */
974
                                return -ETIMEDOUT;
975
                        case -EINTR:
×
976
                                /* Received a different signal and should retry */
977
                                continue;
×
978
                        default:
×
979
                                /* Return any unexpected errors */
980
                                return r;
×
981
                        }
982
                }
983
        }
984

985
        return -EPROTO;
×
986
}
987

988
void sigkill_wait(pid_t pid) {
989
        assert(pid > 1);
45✔
990

991
        (void) kill(pid, SIGKILL);
45✔
992
        (void) wait_for_terminate(pid, NULL);
45✔
993
}
45✔
994

995
void sigkill_waitp(pid_t *pid) {
996
        PROTECT_ERRNO;
11,757✔
997

998
        if (!pid)
11,757✔
999
                return;
1000
        if (*pid <= 1)
11,757✔
1001
                return;
1002

1003
        sigkill_wait(*pid);
44✔
1004
}
1005

1006
void sigterm_wait(pid_t pid) {
1007
        assert(pid > 1);
65✔
1008

1009
        (void) kill_and_sigcont(pid, SIGTERM);
65✔
1010
        (void) wait_for_terminate(pid, NULL);
65✔
1011
}
65✔
1012

1013
void sigkill_nowait(pid_t pid) {
1014
        assert(pid > 1);
×
1015

1016
        (void) kill(pid, SIGKILL);
×
1017
}
×
1018

1019
void sigkill_nowaitp(pid_t *pid) {
1020
        PROTECT_ERRNO;
×
1021

1022
        if (!pid)
×
1023
                return;
1024
        if (*pid <= 1)
×
1025
                return;
1026

1027
        sigkill_nowait(*pid);
×
1028
}
1029

1030
int kill_and_sigcont(pid_t pid, int sig) {
1031
        int r;
65✔
1032

1033
        r = RET_NERRNO(kill(pid, sig));
65✔
1034

1035
        /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
1036
         * affected by a process being suspended anyway. */
1037
        if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
65✔
1038
                (void) kill(pid, SIGCONT);
65✔
1039

1040
        return r;
65✔
1041
}
1042

1043
int getenv_for_pid(pid_t pid, const char *field, char **ret) {
1044
        _cleanup_fclose_ FILE *f = NULL;
4,697✔
1045
        const char *path;
4,697✔
1046
        size_t sum = 0;
4,697✔
1047
        int r;
4,697✔
1048

1049
        assert(pid >= 0);
4,697✔
1050
        assert(field);
4,697✔
1051
        assert(ret);
4,697✔
1052

1053
        if (pid == 0 || pid == getpid_cached())
4,697✔
1054
                return strdup_to_full(ret, getenv(field));
13✔
1055

1056
        if (!pid_is_valid(pid))
4,684✔
1057
                return -EINVAL;
1058

1059
        path = procfs_file_alloca(pid, "environ");
4,684✔
1060

1061
        r = fopen_unlocked(path, "re", &f);
4,684✔
1062
        if (r == -ENOENT)
4,684✔
1063
                return -ESRCH;
1064
        if (r < 0)
4,219✔
1065
                return r;
1066

1067
        for (;;) {
46,264✔
1068
                _cleanup_free_ char *line = NULL;
21,715✔
1069
                const char *match;
24,551✔
1070

1071
                if (sum > ENVIRONMENT_BLOCK_MAX) /* Give up searching eventually */
24,551✔
1072
                        return -ENOBUFS;
1073

1074
                r = read_nul_string(f, LONG_LINE_MAX, &line);
24,551✔
1075
                if (r < 0)
24,551✔
1076
                        return r;
1077
                if (r == 0)  /* EOF */
24,551✔
1078
                        break;
1079

1080
                sum += r;
21,715✔
1081

1082
                match = startswith(line, field);
21,715✔
1083
                if (match && *match == '=')
21,715✔
1084
                        return strdup_to_full(ret, match + 1);
2✔
1085
        }
1086

1087
        *ret = NULL;
2,836✔
1088
        return 0;
2,836✔
1089
}
1090

1091
int pidref_is_my_child(PidRef *pid) {
1092
        int r;
2,367✔
1093

1094
        if (!pidref_is_set(pid))
2,367✔
1095
                return -ESRCH;
2,367✔
1096

1097
        if (pidref_is_remote(pid))
2,367✔
1098
                return -EREMOTE;
1099

1100
        if (pid->pid == 1 || pidref_is_self(pid))
2,367✔
1101
                return false;
×
1102

1103
        pid_t ppid;
2,367✔
1104
        r = pidref_get_ppid(pid, &ppid);
2,367✔
1105
        if (r == -EADDRNOTAVAIL) /* if this process is outside of our pidns, it is definitely not our child */
2,367✔
1106
                return false;
1107
        if (r < 0)
2,367✔
1108
                return r;
1109

1110
        return ppid == getpid_cached();
2,367✔
1111
}
1112

1113
int pid_is_my_child(pid_t pid) {
1114

1115
        if (pid == 0)
×
1116
                return false;
×
1117

1118
        return pidref_is_my_child(&PIDREF_MAKE_FROM_PID(pid));
×
1119
}
1120

1121
int pidref_is_unwaited(PidRef *pid) {
1122
        int r;
8,011✔
1123

1124
        /* Checks whether a PID is still valid at all, including a zombie */
1125

1126
        if (!pidref_is_set(pid))
8,011✔
1127
                return -ESRCH;
1128

1129
        if (pidref_is_remote(pid))
8,010✔
1130
                return -EREMOTE;
1131

1132
        if (pid->pid == 1 || pidref_is_self(pid))
8,010✔
1133
                return true;
5✔
1134

1135
        r = pidref_kill(pid, 0);
8,005✔
1136
        if (r == -ESRCH)
8,005✔
1137
                return false;
1138
        if (r < 0)
1,555✔
1139
                return r;
62✔
1140

1141
        return true;
1142
}
1143

1144
int pid_is_unwaited(pid_t pid) {
1145

1146
        if (pid == 0)
7,352✔
1147
                return true;
7,352✔
1148

1149
        return pidref_is_unwaited(&PIDREF_MAKE_FROM_PID(pid));
7,352✔
1150
}
1151

1152
int pid_is_alive(pid_t pid) {
1153
        int r;
9,976✔
1154

1155
        /* Checks whether a PID is still valid and not a zombie */
1156

1157
        if (pid < 0)
9,976✔
1158
                return -ESRCH;
1159

1160
        if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
9,975✔
1161
                return true;
1162

1163
        if (pid == getpid_cached())
9,975✔
1164
                return true;
1165

1166
        r = get_process_state(pid);
9,974✔
1167
        if (r == -ESRCH)
9,974✔
1168
                return false;
1169
        if (r < 0)
8,052✔
1170
                return r;
1171

1172
        return r != 'Z';
8,052✔
1173
}
1174

1175
int pidref_is_alive(const PidRef *pidref) {
1176
        int r, result;
9,970✔
1177

1178
        if (!pidref_is_set(pidref))
9,970✔
1179
                return -ESRCH;
1180

1181
        if (pidref_is_remote(pidref))
9,970✔
1182
                return -EREMOTE;
1183

1184
        result = pid_is_alive(pidref->pid);
9,970✔
1185
        if (result < 0) {
9,970✔
1186
                assert(result != -ESRCH);
×
1187
                return result;
1188
        }
1189

1190
        r = pidref_verify(pidref);
9,970✔
1191
        if (r == -ESRCH)
9,970✔
1192
                return false;
1193
        if (r < 0)
8,045✔
1194
                return r;
×
1195

1196
        return result;
1197
}
1198

1199
int pidref_from_same_root_fs(PidRef *a, PidRef *b) {
1200
        _cleanup_(pidref_done) PidRef self = PIDREF_NULL;
×
1201
        int r;
13,546✔
1202

1203
        /* Checks if the two specified processes have the same root fs. Either can be specified as NULL in
1204
         * which case we'll check against ourselves. */
1205

1206
        if (!a || !b) {
13,546✔
1207
                r = pidref_set_self(&self);
13,526✔
1208
                if (r < 0)
13,526✔
1209
                        return r;
1210
                if (!a)
13,526✔
1211
                        a = &self;
×
1212
                if (!b)
13,526✔
1213
                        b = &self;
13,526✔
1214
        }
1215

1216
        if (!pidref_is_set(a) || !pidref_is_set(b))
13,546✔
1217
                return -ESRCH;
×
1218

1219
        /* If one of the two processes have the same root they cannot have the same root fs, but if both of
1220
         * them do we don't know */
1221
        if (pidref_is_remote(a) && pidref_is_remote(b))
13,546✔
1222
                return -EREMOTE;
1223
        if (pidref_is_remote(a) || pidref_is_remote(b))
40,638✔
1224
                return false;
1225

1226
        if (pidref_equal(a, b))
13,546✔
1227
                return true;
1228

1229
        const char *roota = procfs_file_alloca(a->pid, "root");
13,432✔
1230
        const char *rootb = procfs_file_alloca(b->pid, "root");
13,432✔
1231

1232
        int result = inode_same(roota, rootb, 0);
13,432✔
1233
        if (result == -ENOENT)
13,432✔
1234
                return proc_mounted() == 0 ? -ENOSYS : -ESRCH;
×
1235
        if (result < 0)
13,432✔
1236
                return result;
1237

1238
        r = pidref_verify(a);
13,315✔
1239
        if (r < 0)
13,315✔
1240
                return r;
1241
        r = pidref_verify(b);
13,315✔
1242
        if (r < 0)
13,315✔
1243
                return r;
×
1244

1245
        return result;
1246
}
1247

1248
bool is_main_thread(void) {
1249
        static thread_local int cached = -1;
6,735,182✔
1250

1251
        if (cached < 0)
6,735,182✔
1252
                cached = getpid_cached() == gettid();
69,850✔
1253

1254
        return cached;
6,735,182✔
1255
}
1256

1257
bool oom_score_adjust_is_valid(int oa) {
1258
        return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
5,773✔
1259
}
1260

1261
unsigned long personality_from_string(const char *p) {
1262
        Architecture architecture;
9✔
1263

1264
        if (!p)
9✔
1265
                return PERSONALITY_INVALID;
1266

1267
        /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
1268
         * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
1269
         * the same register size. */
1270

1271
        architecture = architecture_from_string(p);
8✔
1272
        if (architecture < 0)
8✔
1273
                return PERSONALITY_INVALID;
1274

1275
        if (architecture == native_architecture())
6✔
1276
                return PER_LINUX;
1277
#ifdef ARCHITECTURE_SECONDARY
1278
        if (architecture == ARCHITECTURE_SECONDARY)
3✔
1279
                return PER_LINUX32;
2✔
1280
#endif
1281

1282
        return PERSONALITY_INVALID;
1283
}
1284

1285
const char* personality_to_string(unsigned long p) {
1286
        Architecture architecture = _ARCHITECTURE_INVALID;
1,336✔
1287

1288
        if (p == PER_LINUX)
1,336✔
1289
                architecture = native_architecture();
1290
#ifdef ARCHITECTURE_SECONDARY
1291
        else if (p == PER_LINUX32)
1,331✔
1292
                architecture = ARCHITECTURE_SECONDARY;
1293
#endif
1294

1295
        if (architecture < 0)
1296
                return NULL;
1297

1298
        return architecture_to_string(architecture);
7✔
1299
}
1300

1301
int safe_personality(unsigned long p) {
1302
        int ret;
1,505✔
1303

1304
        /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
1305
         * and in others as negative return value containing an errno-like value. Let's work around this: this is a
1306
         * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
1307
         * the return value indicating the same issue, so that we are definitely on the safe side.
1308
         *
1309
         * See https://github.com/systemd/systemd/issues/6737 */
1310

1311
        errno = 0;
1,505✔
1312
        ret = personality(p);
1,505✔
1313
        if (ret < 0) {
1,505✔
1314
                if (errno != 0)
12✔
1315
                        return -errno;
12✔
1316

1317
                errno = -ret;
×
1318
        }
1319

1320
        return ret;
1321
}
1322

1323
int opinionated_personality(unsigned long *ret) {
1324
        int current;
1,490✔
1325

1326
        /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1327
         * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1328
         * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1329

1330
        current = safe_personality(PERSONALITY_INVALID);
1,490✔
1331
        if (current < 0)
1,490✔
1332
                return current;
1333

1334
        if (((unsigned long) current & OPINIONATED_PERSONALITY_MASK) == PER_LINUX32)
1,490✔
1335
                *ret = PER_LINUX32;
×
1336
        else
1337
                *ret = PER_LINUX;
1,490✔
1338

1339
        return 0;
1340
}
1341

1342
void valgrind_summary_hack(void) {
1343
#if HAVE_VALGRIND_VALGRIND_H
1344
        if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
1345
                pid_t pid;
1346
                pid = raw_clone(SIGCHLD);
1347
                if (pid < 0)
1348
                        log_struct_errno(
1349
                                LOG_EMERG, errno,
1350
                                LOG_MESSAGE_ID(SD_MESSAGE_VALGRIND_HELPER_FORK_STR),
1351
                                LOG_MESSAGE("Failed to fork off valgrind helper: %m"));
1352
                else if (pid == 0)
1353
                        exit(EXIT_SUCCESS);
1354
                else {
1355
                        log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
1356
                        (void) wait_for_terminate(pid, NULL);
1357
                }
1358
        }
1359
#endif
1360
}
47✔
1361

1362
int pid_compare_func(const pid_t *a, const pid_t *b) {
1363
        /* Suitable for usage in qsort() */
1364
        return CMP(*a, *b);
1,598✔
1365
}
1366

1367
/* The cached PID, possible values:
1368
 *
1369
 *     == UNSET [0]  → cache not initialized yet
1370
 *     == BUSY [-1]  → some thread is initializing it at the moment
1371
 *     any other     → the cached PID
1372
 */
1373

1374
#define CACHED_PID_UNSET ((pid_t) 0)
1375
#define CACHED_PID_BUSY ((pid_t) -1)
1376

1377
static pid_t cached_pid = CACHED_PID_UNSET;
1378

1379
void reset_cached_pid(void) {
1380
        /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1381
        cached_pid = CACHED_PID_UNSET;
3,163✔
1382
}
3,163✔
1383

1384
pid_t getpid_cached(void) {
1385
        static bool installed = false;
78,297,041✔
1386
        pid_t current_value = CACHED_PID_UNSET;
78,297,041✔
1387

1388
        /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1389
         * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1390
         * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1391
         * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1392
         *
1393
         * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1394
         * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1395
         */
1396

1397
        (void) __atomic_compare_exchange_n(
78,297,041✔
1398
                        &cached_pid,
1399
                        &current_value,
1400
                        CACHED_PID_BUSY,
1401
                        false,
1402
                        __ATOMIC_SEQ_CST,
1403
                        __ATOMIC_SEQ_CST);
1404

1405
        switch (current_value) {
78,297,041✔
1406

1407
        case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
121,180✔
1408
                pid_t new_pid;
121,180✔
1409

1410
                new_pid = getpid();
121,180✔
1411

1412
                if (!installed) {
121,180✔
1413
                        /* __register_atfork() either returns 0 or -ENOMEM, in its glibc implementation. Since it's
1414
                         * only half-documented (glibc doesn't document it but LSB does — though only superficially)
1415
                         * we'll check for errors only in the most generic fashion possible. */
1416

1417
                        if (pthread_atfork(NULL, NULL, reset_cached_pid) != 0) {
91,579✔
1418
                                /* OOM? Let's try again later */
1419
                                cached_pid = CACHED_PID_UNSET;
×
1420
                                return new_pid;
×
1421
                        }
1422

1423
                        installed = true;
91,579✔
1424
                }
1425

1426
                cached_pid = new_pid;
121,180✔
1427
                return new_pid;
121,180✔
1428
        }
1429

1430
        case CACHED_PID_BUSY: /* Somebody else is currently initializing */
×
1431
                return getpid();
×
1432

1433
        default: /* Properly initialized */
1434
                return current_value;
1435
        }
1436
}
1437

1438
int must_be_root(void) {
1439

1440
        if (geteuid() == 0)
59✔
1441
                return 0;
1442

1443
        return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be root.");
×
1444
}
1445

1446
pid_t clone_with_nested_stack(int (*fn)(void *), int flags, void *userdata) {
1447
        size_t ps;
2,931✔
1448
        pid_t pid;
2,931✔
1449
        void *mystack;
2,931✔
1450

1451
        /* A wrapper around glibc's clone() call that automatically sets up a "nested" stack. Only supports
1452
         * invocations without CLONE_VM, so that we can continue to use the parent's stack mapping.
1453
         *
1454
         * Note: glibc's clone() wrapper does not synchronize malloc() locks. This means that if the parent
1455
         * is threaded these locks will be in an undefined state in the child, and hence memory allocations
1456
         * are likely going to run into deadlocks. Hence: if you use this function make sure your parent is
1457
         * strictly single-threaded or your child never calls malloc(). */
1458

1459
        assert((flags & (CLONE_VM|CLONE_PARENT_SETTID|CLONE_CHILD_SETTID|
2,931✔
1460
                         CLONE_CHILD_CLEARTID|CLONE_SETTLS)) == 0);
1461

1462
        /* We allocate some space on the stack to use as the stack for the child (hence "nested"). Note that
1463
         * the net effect is that the child will have the start of its stack inside the stack of the parent,
1464
         * but since they are a CoW copy of each other that's fine. We allocate one page-aligned page. But
1465
         * since we don't want to deal with differences between systems where the stack grows backwards or
1466
         * forwards we'll allocate one more and place the stack address in the middle. Except that we also
1467
         * want it page aligned, hence we'll allocate one page more. Makes 3. */
1468

1469
        ps = page_size();
2,931✔
1470
        mystack = alloca(ps*3);
2,931✔
1471
        mystack = (uint8_t*) mystack + ps; /* move pointer one page ahead since stacks usually grow backwards */
2,931✔
1472
        mystack = (void*) ALIGN_TO((uintptr_t) mystack, ps); /* align to page size (moving things further ahead) */
2,931✔
1473

1474
#if HAVE_CLONE
1475
        pid = clone(fn, mystack, flags, userdata);
2,931✔
1476
#else
1477
        pid = __clone2(fn, mystack, ps, flags, userdata);
1478
#endif
1479
        if (pid < 0)
2,931✔
1480
                return -errno;
×
1481

1482
        return pid;
1483
}
1484

1485
static void restore_sigsetp(sigset_t **ssp) {
59,325✔
1486
        if (*ssp)
59,325✔
1487
                (void) sigprocmask(SIG_SETMASK, *ssp, NULL);
24,820✔
1488
}
59,325✔
1489

1490
static int fork_flags_to_signal(ForkFlags flags) {
28,861✔
1491
        return (flags & FORK_DEATHSIG_SIGTERM) ? SIGTERM :
28,861✔
1492
                (flags & FORK_DEATHSIG_SIGINT) ? SIGINT :
1,444✔
1493
                                                 SIGKILL;
1494
}
1495

1496
int pidref_safe_fork_full(
1497
                const char *name,
1498
                const int stdio_fds[3],
1499
                int except_fds[],
1500
                size_t n_except_fds,
1501
                ForkFlags flags,
1502
                PidRef *ret_pid) {
1503

1504
        pid_t original_pid, pid;
31,441✔
1505
        sigset_t saved_ss, ss;
31,441✔
1506
        _unused_ _cleanup_(restore_sigsetp) sigset_t *saved_ssp = NULL;
×
1507
        bool block_signals = false, block_all = false, intermediary = false;
31,441✔
1508
        _cleanup_close_pair_ int pidref_transport_fds[2] = EBADF_PAIR;
59,325✔
1509
        int prio, r;
31,441✔
1510

1511
        assert(!FLAGS_SET(flags, FORK_WAIT|FORK_FREEZE));
31,441✔
1512
        assert(!FLAGS_SET(flags, FORK_DETACH) ||
31,441✔
1513
               (flags & (FORK_WAIT|FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGINT|FORK_DEATHSIG_SIGKILL)) == 0);
1514

1515
        /* A wrapper around fork(), that does a couple of important initializations in addition to mere
1516
         * forking. If provided, ret_pid is initialized in both the parent and the child process, both times
1517
         * referencing the child process. Returns == 0 in the child and > 0 in the parent. */
1518

1519
        prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG;
31,441✔
1520

1521
        original_pid = getpid_cached();
31,441✔
1522

1523
        if (flags & FORK_FLUSH_STDIO) {
31,441✔
1524
                fflush(stdout);
5✔
1525
                fflush(stderr); /* This one shouldn't be necessary, stderr should be unbuffered anyway, but let's better be safe than sorry */
5✔
1526
        }
1527

1528
        if (flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGINT)) {
31,441✔
1529
                /* We temporarily block all signals, so that the new child has them blocked initially. This
1530
                 * way, we can be sure that SIGTERMs are not lost we might send to the child. (Note that for
1531
                 * FORK_DEATHSIG_SIGKILL we don't bother, since it cannot be blocked anyway.) */
1532

1533
                assert_se(sigfillset(&ss) >= 0);
26,915✔
1534
                block_signals = block_all = true;
1535

1536
        } else if (flags & FORK_WAIT) {
4,526✔
1537
                /* Let's block SIGCHLD at least, so that we can safely watch for the child process */
1538

1539
                assert_se(sigemptyset(&ss) >= 0);
106✔
1540
                assert_se(sigaddset(&ss, SIGCHLD) >= 0);
106✔
1541
                block_signals = true;
1542
        }
1543

1544
        if (block_signals) {
1545
                if (sigprocmask(SIG_BLOCK, &ss, &saved_ss) < 0)
27,021✔
1546
                        return log_full_errno(prio, errno, "Failed to block signal mask: %m");
×
1547
                saved_ssp = &saved_ss;
27,021✔
1548
        }
1549

1550
        if (FLAGS_SET(flags, FORK_DETACH)) {
31,441✔
1551
                /* Fork off intermediary child if needed */
1552

1553
                r = is_reaper_process();
98✔
1554
                if (r < 0)
98✔
1555
                        return log_full_errno(prio, r, "Failed to determine if we are a reaper process: %m");
×
1556

1557
                if (!r) {
98✔
1558
                        /* Not a reaper process, hence do a double fork() so we are reparented to one */
1559

1560
                        if (ret_pid && socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pidref_transport_fds) < 0)
10✔
1561
                                return log_full_errno(prio, errno, "Failed to allocate pidref socket: %m");
×
1562

1563
                        pid = fork();
10✔
1564
                        if (pid < 0)
26✔
1565
                                return log_full_errno(prio, errno, "Failed to fork off '%s': %m", strna(name));
×
1566
                        if (pid > 0) {
26✔
1567
                                log_debug("Successfully forked off intermediary '%s' as PID " PID_FMT ".", strna(name), pid);
10✔
1568

1569
                                pidref_transport_fds[1] = safe_close(pidref_transport_fds[1]);
10✔
1570

1571
                                if (pidref_transport_fds[0] >= 0) {
10✔
1572
                                        /* Wait for the intermediary child to exit so the caller can be certain the actual child
1573
                                         * process has been reparented by the time this function returns. */
1574
                                        r = wait_for_terminate_and_check(name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0);
18✔
1575
                                        if (r < 0)
9✔
1576
                                                return log_full_errno(prio, r, "Failed to wait for intermediary process: %m");
×
1577
                                        if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */
9✔
1578
                                                return -EPROTO;
1579

1580
                                        int pidfd;
9✔
1581
                                        ssize_t n = receive_one_fd_iov(
18✔
1582
                                                        pidref_transport_fds[0],
1583
                                                        &IOVEC_MAKE(&pid, sizeof(pid)),
9✔
1584
                                                        /* iovlen= */ 1,
1585
                                                        /* flags= */ 0,
1586
                                                        &pidfd);
1587
                                        if (n < 0)
9✔
1588
                                                return log_full_errno(prio, n, "Failed to receive child pidref: %m");
×
1589

1590
                                        *ret_pid = (PidRef) { .pid = pid, .fd = pidfd };
9✔
1591
                                }
1592

1593
                                return 1; /* return in the parent */
10✔
1594
                        }
1595

1596
                        pidref_transport_fds[0] = safe_close(pidref_transport_fds[0]);
16✔
1597
                        intermediary = true;
16✔
1598
                }
1599
        }
1600

1601
        if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS|FORK_NEW_NETNS|FORK_NEW_PIDNS)) != 0)
31,447✔
1602
                pid = raw_clone(SIGCHLD|
9,878✔
1603
                                (FLAGS_SET(flags, FORK_NEW_MOUNTNS) ? CLONE_NEWNS : 0) |
9,878✔
1604
                                (FLAGS_SET(flags, FORK_NEW_USERNS) ? CLONE_NEWUSER : 0) |
9,878✔
1605
                                (FLAGS_SET(flags, FORK_NEW_NETNS) ? CLONE_NEWNET : 0) |
9,878✔
1606
                                (FLAGS_SET(flags, FORK_NEW_PIDNS) ? CLONE_NEWPID : 0));
9,878✔
1607
        else
1608
                pid = fork();
21,569✔
1609
        if (pid < 0)
59,325✔
1610
                return log_full_errno(prio, errno, "Failed to fork off '%s': %m", strna(name));
2✔
1611
        if (pid > 0) {
59,324✔
1612

1613
                /* If we are in the intermediary process, exit now */
1614
                if (intermediary) {
29,239✔
1615
                        if (pidref_transport_fds[1] >= 0) {
10✔
1616
                                _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
9✔
1617

1618
                                r = pidref_set_pid(&pidref, pid);
9✔
1619
                                if (r < 0) {
9✔
1620
                                        log_full_errno(prio, r, "Failed to open reference to PID "PID_FMT": %m", pid);
×
1621
                                        _exit(EXIT_FAILURE);
×
1622
                                }
1623

1624
                                r = send_one_fd_iov(
9✔
1625
                                                pidref_transport_fds[1],
1626
                                                pidref.fd,
1627
                                                &IOVEC_MAKE(&pidref.pid, sizeof(pidref.pid)),
1628
                                                /* iovlen= */ 1,
1629
                                                /* flags= */ 0);
1630
                                if (r < 0) {
9✔
1631
                                        log_full_errno(prio, r, "Failed to send child pidref: %m");
×
1632
                                        _exit(EXIT_FAILURE);
×
1633
                                }
1634
                        }
1635

1636
                        _exit(EXIT_SUCCESS);
10✔
1637
                }
1638

1639
                /* We are in the parent process */
1640
                log_debug("Successfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
29,229✔
1641

1642
                if (flags & FORK_WAIT) {
29,229✔
1643
                        if (block_all) {
3,205✔
1644
                                /* undo everything except SIGCHLD */
1645
                                ss = saved_ss;
3,099✔
1646
                                assert_se(sigaddset(&ss, SIGCHLD) >= 0);
3,099✔
1647
                                (void) sigprocmask(SIG_SETMASK, &ss, NULL);
3,099✔
1648
                        }
1649

1650
                        r = wait_for_terminate_and_check(name, pid, (flags & FORK_LOG ? WAIT_LOG : 0));
5,956✔
1651
                        if (r < 0)
3,205✔
1652
                                return r;
1653
                        if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */
3,205✔
1654
                                return -EPROTO;
1655

1656
                        /* If we are in the parent and successfully waited, then the process doesn't exist anymore. */
1657
                        if (ret_pid)
3,205✔
1658
                                *ret_pid = PIDREF_NULL;
5✔
1659

1660
                        return 1;
3,205✔
1661
                }
1662

1663
                if (ret_pid) {
26,024✔
1664
                        if (FLAGS_SET(flags, FORK_PID_ONLY))
25,550✔
1665
                                *ret_pid = PIDREF_MAKE_FROM_PID(pid);
7,145✔
1666
                        else {
1667
                                r = pidref_set_pid(ret_pid, pid);
18,405✔
1668
                                if (r < 0) /* Let's not fail for this, no matter what, the process exists after all, and that's key */
18,405✔
1669
                                        *ret_pid = PIDREF_MAKE_FROM_PID(pid);
×
1670
                        }
1671
                }
1672

1673
                return 1;
26,024✔
1674
        }
1675

1676
        /* We are in the child process */
1677

1678
        pidref_transport_fds[1] = safe_close(pidref_transport_fds[1]);
30,085✔
1679

1680
        /* Restore signal mask manually */
1681
        saved_ssp = NULL;
30,085✔
1682

1683
        if (flags & FORK_REOPEN_LOG) {
30,085✔
1684
                /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
1685
                log_close();
2,563✔
1686
                log_set_open_when_needed(true);
2,563✔
1687
                log_settle_target();
2,563✔
1688
        }
1689

1690
        if (name) {
30,085✔
1691
                r = rename_process(name);
30,085✔
1692
                if (r < 0)
30,085✔
1693
                        log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG,
×
1694
                                       r, "Failed to rename process, ignoring: %m");
1695
        }
1696

1697
        if (flags & (FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGINT|FORK_DEATHSIG_SIGKILL))
30,085✔
1698
                if (prctl(PR_SET_PDEATHSIG, fork_flags_to_signal(flags)) < 0) {
28,861✔
1699
                        log_full_errno(prio, errno, "Failed to set death signal: %m");
×
1700
                        _exit(EXIT_FAILURE);
×
1701
                }
1702

1703
        if (flags & FORK_RESET_SIGNALS) {
30,085✔
1704
                r = reset_all_signal_handlers();
23,332✔
1705
                if (r < 0) {
23,332✔
1706
                        log_full_errno(prio, r, "Failed to reset signal handlers: %m");
×
1707
                        _exit(EXIT_FAILURE);
×
1708
                }
1709

1710
                /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
1711
                r = reset_signal_mask();
23,332✔
1712
                if (r < 0) {
23,332✔
1713
                        log_full_errno(prio, r, "Failed to reset signal mask: %m");
×
1714
                        _exit(EXIT_FAILURE);
×
1715
                }
1716
        } else if (block_signals) { /* undo what we did above */
6,753✔
1717
                if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
6,394✔
1718
                        log_full_errno(prio, errno, "Failed to restore signal mask: %m");
×
1719
                        _exit(EXIT_FAILURE);
×
1720
                }
1721
        }
1722

1723
        if (flags & (FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGKILL|FORK_DEATHSIG_SIGINT)) {
30,085✔
1724
                pid_t ppid;
28,861✔
1725
                /* Let's see if the parent PID is still the one we started from? If not, then the parent
1726
                 * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
1727

1728
                ppid = getppid();
28,861✔
1729
                if (ppid == 0)
28,861✔
1730
                        /* Parent is in a different PID namespace. */;
1731
                else if (ppid != original_pid) {
28,824✔
1732
                        int sig = fork_flags_to_signal(flags);
×
1733
                        log_debug("Parent died early, raising %s.", signal_to_string(sig));
×
1734
                        (void) raise(sig);
×
1735
                        _exit(EXIT_FAILURE);
×
1736
                }
1737
        }
1738

1739
        if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) {
30,085✔
1740
                /* Optionally, make sure we never propagate mounts to the host. */
1741
                if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
110✔
1742
                        log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m");
×
1743
                        _exit(EXIT_FAILURE);
×
1744
                }
1745
        }
1746

1747
        if (FLAGS_SET(flags, FORK_PRIVATE_TMP)) {
30,085✔
1748
                assert(FLAGS_SET(flags, FORK_NEW_MOUNTNS));
×
1749

1750
                /* Optionally, overmount new tmpfs instance on /tmp/. */
1751
                r = mount_nofollow("tmpfs", "/tmp", "tmpfs",
×
1752
                                   MS_NOSUID|MS_NODEV,
1753
                                   "mode=01777" TMPFS_LIMITS_RUN);
1754
                if (r < 0) {
×
1755
                        log_full_errno(prio, r, "Failed to overmount /tmp/: %m");
×
1756
                        _exit(EXIT_FAILURE);
×
1757
                }
1758
        }
1759

1760
        if (flags & FORK_REARRANGE_STDIO) {
30,085✔
1761
                if (stdio_fds) {
14,383✔
1762
                        r = rearrange_stdio(stdio_fds[0], stdio_fds[1], stdio_fds[2]);
14,372✔
1763
                        if (r < 0) {
14,372✔
1764
                                log_full_errno(prio, r, "Failed to rearrange stdio fds: %m");
×
1765
                                _exit(EXIT_FAILURE);
×
1766
                        }
1767

1768
                        /* Turn off O_NONBLOCK on the fdio fds, in case it was left on */
1769
                        stdio_disable_nonblock();
14,372✔
1770
                } else {
1771
                        r = make_null_stdio();
11✔
1772
                        if (r < 0) {
11✔
1773
                                log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m");
×
1774
                                _exit(EXIT_FAILURE);
×
1775
                        }
1776
                }
1777
        } else if (flags & FORK_STDOUT_TO_STDERR) {
15,702✔
1778
                if (dup2(STDERR_FILENO, STDOUT_FILENO) < 0) {
2✔
1779
                        log_full_errno(prio, errno, "Failed to connect stdout to stderr: %m");
×
1780
                        _exit(EXIT_FAILURE);
×
1781
                }
1782
        }
1783

1784
        if (flags & FORK_CLOSE_ALL_FDS) {
30,085✔
1785
                /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
1786
                log_close();
22,331✔
1787

1788
                r = close_all_fds(except_fds, n_except_fds);
22,331✔
1789
                if (r < 0) {
22,331✔
1790
                        log_full_errno(prio, r, "Failed to close all file descriptors: %m");
×
1791
                        _exit(EXIT_FAILURE);
×
1792
                }
1793
        }
1794

1795
        if (flags & FORK_PACK_FDS) {
30,085✔
1796
                /* FORK_CLOSE_ALL_FDS ensures that except_fds are the only FDs >= 3 that are
1797
                 * open, this is including the log. This is required by pack_fds, which will
1798
                 * get stuck in an infinite loop of any FDs other than except_fds are open. */
1799
                assert(FLAGS_SET(flags, FORK_CLOSE_ALL_FDS));
87✔
1800

1801
                r = pack_fds(except_fds, n_except_fds);
87✔
1802
                if (r < 0) {
87✔
1803
                        log_full_errno(prio, r, "Failed to pack file descriptors: %m");
×
1804
                        _exit(EXIT_FAILURE);
×
1805
                }
1806
        }
1807

1808
        if (flags & FORK_CLOEXEC_OFF) {
30,085✔
1809
                r = fd_cloexec_many(except_fds, n_except_fds, false);
97✔
1810
                if (r < 0) {
97✔
1811
                        log_full_errno(prio, r, "Failed to turn off O_CLOEXEC on file descriptors: %m");
×
1812
                        _exit(EXIT_FAILURE);
×
1813
                }
1814
        }
1815

1816
        /* When we were asked to reopen the logs, do so again now */
1817
        if (flags & FORK_REOPEN_LOG) {
30,085✔
1818
                log_open();
2,563✔
1819
                log_set_open_when_needed(false);
2,563✔
1820
        }
1821

1822
        if (flags & FORK_RLIMIT_NOFILE_SAFE) {
30,085✔
1823
                r = rlimit_nofile_safe();
18,550✔
1824
                if (r < 0) {
18,550✔
1825
                        log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m");
×
1826
                        _exit(EXIT_FAILURE);
×
1827
                }
1828
        }
1829

1830
        if (!FLAGS_SET(flags, FORK_KEEP_NOTIFY_SOCKET)) {
30,085✔
1831
                r = RET_NERRNO(unsetenv("NOTIFY_SOCKET"));
30,085✔
1832
                if (r < 0) {
×
1833
                        log_full_errno(prio, r, "Failed to unset $NOTIFY_SOCKET: %m");
×
1834
                        _exit(EXIT_FAILURE);
×
1835
                }
1836
        }
1837

1838
        if (FLAGS_SET(flags, FORK_FREEZE))
30,085✔
1839
                freeze();
×
1840

1841
        if (ret_pid) {
30,085✔
1842
                if (FLAGS_SET(flags, FORK_PID_ONLY))
26,144✔
1843
                        *ret_pid = PIDREF_MAKE_FROM_PID(getpid_cached());
7,084✔
1844
                else {
1845
                        r = pidref_set_self(ret_pid);
19,060✔
1846
                        if (r < 0) {
19,060✔
1847
                                log_full_errno(prio, r, "Failed to acquire PID reference on ourselves: %m");
×
1848
                                _exit(EXIT_FAILURE);
×
1849
                        }
1850
                }
1851
        }
1852

1853
        return 0;
1854
}
1855

1856
int safe_fork_full(
1857
                const char *name,
1858
                const int stdio_fds[3],
1859
                int except_fds[],
1860
                size_t n_except_fds,
1861
                ForkFlags flags,
1862
                pid_t *ret_pid) {
1863

1864
        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
21,786✔
1865
        int r;
13,021✔
1866

1867
        /* Getting the detached child process pid without pidfd is racy, so don't allow it if not returning
1868
         * a pidref to the caller. */
1869
        assert(!FLAGS_SET(flags, FORK_DETACH) || !ret_pid);
13,021✔
1870

1871
        r = pidref_safe_fork_full(name, stdio_fds, except_fds, n_except_fds, flags|FORK_PID_ONLY, ret_pid ? &pidref : NULL);
18,671✔
1872
        if (r < 0 || !ret_pid)
21,786✔
1873
                return r;
1874

1875
        *ret_pid = pidref.pid;
14,232✔
1876

1877
        return r;
14,232✔
1878
}
1879

1880
int namespace_fork(
1881
                const char *outer_name,
1882
                const char *inner_name,
1883
                int except_fds[],
1884
                size_t n_except_fds,
1885
                ForkFlags flags,
1886
                int pidns_fd,
1887
                int mntns_fd,
1888
                int netns_fd,
1889
                int userns_fd,
1890
                int root_fd,
1891
                pid_t *ret_pid) {
1892

1893
        int r;
163✔
1894

1895
        /* This is much like safe_fork(), but forks twice, and joins the specified namespaces in the middle
1896
         * process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that
1897
         * /proc/self/fd works correctly. */
1898

1899
        r = safe_fork_full(outer_name,
477✔
1900
                           NULL,
1901
                           except_fds, n_except_fds,
1902
                           (flags|FORK_DEATHSIG_SIGINT|FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGKILL) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid);
163✔
1903
        if (r < 0)
314✔
1904
                return r;
1905
        if (r == 0) {
314✔
1906
                pid_t pid;
151✔
1907

1908
                /* Child */
1909

1910
                r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd);
151✔
1911
                if (r < 0) {
151✔
1912
                        log_full_errno(FLAGS_SET(flags, FORK_LOG) ? LOG_ERR : LOG_DEBUG, r, "Failed to join namespace: %m");
×
1913
                        _exit(EXIT_FAILURE);
×
1914
                }
1915

1916
                /* We mask a few flags here that either make no sense for the grandchild, or that we don't have to do again */
1917
                r = safe_fork_full(inner_name,
454✔
1918
                                   NULL,
1919
                                   except_fds, n_except_fds,
1920
                                   flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_REARRANGE_STDIO), &pid);
151✔
1921
                if (r < 0)
303✔
1922
                        _exit(EXIT_FAILURE);
×
1923
                if (r == 0) {
303✔
1924
                        /* Child */
1925
                        if (ret_pid)
152✔
1926
                                *ret_pid = pid;
152✔
1927
                        return 0;
152✔
1928
                }
1929

1930
                r = wait_for_terminate_and_check(inner_name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0);
302✔
1931
                if (r < 0)
151✔
1932
                        _exit(EXIT_FAILURE);
×
1933

1934
                _exit(r);
151✔
1935
        }
1936

1937
        return 1;
1938
}
1939

1940
int set_oom_score_adjust(int value) {
1941
        char t[DECIMAL_STR_MAX(int)];
3,982✔
1942

1943
        if (!oom_score_adjust_is_valid(value))
3,982✔
1944
                return -EINVAL;
3,982✔
1945

1946
        xsprintf(t, "%i", value);
3,982✔
1947

1948
        return write_string_file("/proc/self/oom_score_adj", t,
3,982✔
1949
                                 WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);
1950
}
1951

1952
int get_oom_score_adjust(int *ret) {
1953
        _cleanup_free_ char *t = NULL;
1,099✔
1954
        int r, a;
1,099✔
1955

1956
        r = read_virtual_file("/proc/self/oom_score_adj", SIZE_MAX, &t, NULL);
1,099✔
1957
        if (r < 0)
1,099✔
1958
                return r;
1959

1960
        delete_trailing_chars(t, WHITESPACE);
1,099✔
1961

1962
        r = safe_atoi(t, &a);
1,099✔
1963
        if (r < 0)
1,099✔
1964
                return r;
1965

1966
        if (!oom_score_adjust_is_valid(a))
1,099✔
1967
                return -ENODATA;
1968

1969
        if (ret)
1,099✔
1970
                *ret = a;
1,099✔
1971

1972
        return 0;
1973
}
1974

1975
static int rlimit_to_nice(rlim_t limit) {
2✔
1976
        if (limit <= 1)
2✔
1977
                return PRIO_MAX-1; /* i.e. 19 */
1978

1979
        if (limit >= -PRIO_MIN + PRIO_MAX)
2✔
1980
                return PRIO_MIN; /* i.e. -20 */
1981

1982
        return PRIO_MAX - (int) limit;
2✔
1983
}
1984

1985
int setpriority_closest(int priority) {
1986
        struct rlimit highest;
24✔
1987
        int r, current, limit;
24✔
1988

1989
        /* Try to set requested nice level */
1990
        r = RET_NERRNO(setpriority(PRIO_PROCESS, 0, priority));
24✔
1991
        if (r >= 0)
2✔
1992
                return 1;
22✔
1993
        if (!ERRNO_IS_NEG_PRIVILEGE(r))
2✔
1994
                return r;
1995

1996
        errno = 0;
2✔
1997
        current = getpriority(PRIO_PROCESS, 0);
2✔
1998
        if (errno != 0)
2✔
1999
                return -errno;
×
2000

2001
        if (priority == current)
2✔
2002
                return 1;
2003

2004
       /* Hmm, we'd expect that raising the nice level from our status quo would always work. If it doesn't,
2005
        * then the whole setpriority() system call is blocked to us, hence let's propagate the error
2006
        * right-away */
2007
        if (priority > current)
2✔
2008
                return r;
2009

2010
        if (getrlimit(RLIMIT_NICE, &highest) < 0)
2✔
2011
                return -errno;
×
2012

2013
        limit = rlimit_to_nice(highest.rlim_cur);
2✔
2014

2015
        /* Push to the allowed limit if we're higher than that. Note that we could also be less nice than
2016
         * limit allows us, but still higher than what's requested. In that case our current value is
2017
         * the best choice. */
2018
        if (current > limit)
2✔
2019
                if (setpriority(PRIO_PROCESS, 0, limit) < 0)
2✔
2020
                        return -errno;
×
2021

2022
        log_debug("Cannot set requested nice level (%i), using next best (%i).", priority, MIN(current, limit));
2✔
2023
        return 0;
2024
}
2025

2026
_noreturn_ void freeze(void) {
2027
        log_close();
×
2028

2029
        /* Make sure nobody waits for us (i.e. on one of our sockets) anymore. Note that we use
2030
         * close_all_fds_without_malloc() instead of plain close_all_fds() here, since we want this function
2031
         * to be compatible with being called from signal handlers. */
2032
        (void) close_all_fds_without_malloc(NULL, 0);
×
2033

2034
        /* Let's not freeze right away, but keep reaping zombies. */
2035
        for (;;) {
×
2036
                siginfo_t si = {};
×
2037

2038
                if (waitid(P_ALL, 0, &si, WEXITED) < 0 && errno != EINTR)
×
2039
                        break;
2040
        }
2041

2042
        /* waitid() failed with an ECHLD error (because there are no left-over child processes) or any other
2043
         * (unexpected) error. Freeze for good now! */
2044
        for (;;)
×
2045
                pause();
×
2046
}
2047

2048
int get_process_threads(pid_t pid) {
2049
        _cleanup_free_ char *t = NULL;
7✔
2050
        int n, r;
7✔
2051

2052
        if (pid < 0)
7✔
2053
                return -EINVAL;
2054

2055
        r = procfs_file_get_field(pid, "status", "Threads", &t);
7✔
2056
        if (r == -ENOENT)
7✔
2057
                return -ESRCH;
2058
        if (r < 0)
7✔
2059
                return r;
2060

2061
        r = safe_atoi(t, &n);
7✔
2062
        if (r < 0)
7✔
2063
                return r;
2064
        if (n < 0)
7✔
2065
                return -EINVAL;
×
2066

2067
        return n;
2068
}
2069

2070
int is_reaper_process(void) {
2071
        int b = 0;
3,033✔
2072

2073
        /* Checks if we are running in a reaper process, i.e. if we are expected to deal with processes
2074
         * reparented to us. This simply checks if we are PID 1 or if PR_SET_CHILD_SUBREAPER was called. */
2075

2076
        if (getpid_cached() == 1)
3,033✔
2077
                return true;
3,033✔
2078

2079
        if (prctl(PR_GET_CHILD_SUBREAPER, (unsigned long) &b, 0UL, 0UL, 0UL) < 0)
299✔
2080
                return -errno;
×
2081

2082
        return b != 0;
299✔
2083
}
2084

2085
int make_reaper_process(bool b) {
2086

2087
        if (getpid_cached() == 1) {
597✔
2088

2089
                if (!b)
60✔
2090
                        return -EINVAL;
2091

2092
                return 0;
60✔
2093
        }
2094

2095
        /* Some prctl()s insist that all 5 arguments are specified, others do not. Let's always specify all,
2096
         * to avoid any ambiguities */
2097
        if (prctl(PR_SET_CHILD_SUBREAPER, (unsigned long) b, 0UL, 0UL, 0UL) < 0)
537✔
2098
                return -errno;
×
2099

2100
        return 0;
2101
}
2102

2103
DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(posix_spawnattr_t*, posix_spawnattr_destroy, NULL);
×
2104

2105
int posix_spawn_wrapper(
2106
                const char *path,
2107
                char * const *argv,
2108
                char * const *envp,
2109
                const char *cgroup,
2110
                PidRef *ret_pidref) {
2111

2112
        short flags = POSIX_SPAWN_SETSIGMASK;
2,200✔
2113
        posix_spawnattr_t attr;
2,200✔
2114
        sigset_t mask;
2,200✔
2115
        int r;
2,200✔
2116

2117
        /* Forks and invokes 'path' with 'argv' and 'envp' using CLONE_VM and CLONE_VFORK, which means the
2118
         * caller will be blocked until the child either exits or exec's. The memory of the child will be
2119
         * fully shared with the memory of the parent, so that there are no copy-on-write or memory.max
2120
         * issues.
2121
         *
2122
         * Also, move the newly-created process into 'cgroup' through POSIX_SPAWN_SETCGROUP (clone3())
2123
         * if available.
2124
         * returns 1: We're already in the right cgroup
2125
         *         0: 'cgroup' not specified or POSIX_SPAWN_SETCGROUP is not supported. The caller
2126
         *            needs to call 'cg_attach' on their own */
2127

2128
        assert(path);
2,200✔
2129
        assert(argv);
2,200✔
2130
        assert(ret_pidref);
2,200✔
2131

2132
        assert_se(sigfillset(&mask) >= 0);
2,200✔
2133

2134
        r = posix_spawnattr_init(&attr);
2,200✔
2135
        if (r != 0)
2,200✔
2136
                return -r; /* These functions return a positive errno on failure */
2,200✔
2137

2138
        /* Initialization needs to succeed before we can set up a destructor. */
2139
        _unused_ _cleanup_(posix_spawnattr_destroyp) posix_spawnattr_t *attr_destructor = &attr;
4,400✔
2140

2141
#if HAVE_PIDFD_SPAWN
2142
        static bool have_clone_into_cgroup = true; /* kernel 5.7+ */
2,200✔
2143
        _cleanup_close_ int cgroup_fd = -EBADF;
2,200✔
2144

2145
        if (cgroup && have_clone_into_cgroup) {
2,200✔
2146
                _cleanup_free_ char *resolved_cgroup = NULL;
2,200✔
2147

2148
                r = cg_get_path_and_check(
2,200✔
2149
                                SYSTEMD_CGROUP_CONTROLLER,
2150
                                cgroup,
2151
                                /* suffix= */ NULL,
2152
                                &resolved_cgroup);
2153
                if (r < 0)
2,200✔
2154
                        return r;
2155

2156
                cgroup_fd = open(resolved_cgroup, O_PATH|O_DIRECTORY|O_CLOEXEC);
2,200✔
2157
                if (cgroup_fd < 0)
2,200✔
2158
                        return -errno;
×
2159

2160
                r = posix_spawnattr_setcgroup_np(&attr, cgroup_fd);
2,200✔
2161
                if (r != 0)
2,200✔
2162
                        return -r;
×
2163

2164
                flags |= POSIX_SPAWN_SETCGROUP;
2,200✔
2165
        }
2166
#endif
2167

2168
        r = posix_spawnattr_setflags(&attr, flags);
2,200✔
2169
        if (r != 0)
2,200✔
2170
                return -r;
×
2171
        r = posix_spawnattr_setsigmask(&attr, &mask);
2,200✔
2172
        if (r != 0)
2,200✔
2173
                return -r;
×
2174

2175
#if HAVE_PIDFD_SPAWN
2176
        _cleanup_close_ int pidfd = -EBADF;
2,200✔
2177

2178
        r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
2,200✔
2179
        if (ERRNO_IS_NOT_SUPPORTED(r) && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) && cg_is_threaded(cgroup) > 0)
2,200✔
2180
                return -EUCLEAN; /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode,
2181
                                    turn that into something recognizable */
2182
        if ((ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || r == E2BIG) &&
2,200✔
2183
            FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) {
2184
                /* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but
2185
                 * need to disable POSIX_SPAWN_SETCGROUP, which is what redirects to clone3().
2186
                 * Note that we might get E2BIG here since some kernels (e.g. 5.4) support clone3()
2187
                 * but not CLONE_INTO_CGROUP. */
2188

2189
                /* CLONE_INTO_CGROUP definitely won't work, hence remember the fact so that we don't
2190
                 * retry every time. */
2191
                have_clone_into_cgroup = false;
×
2192

2193
                flags &= ~POSIX_SPAWN_SETCGROUP;
×
2194
                r = posix_spawnattr_setflags(&attr, flags);
×
2195
                if (r != 0)
×
2196
                        return -r;
×
2197

2198
                r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
×
2199
        }
2200
        if (r != 0)
2,200✔
2201
                return -r;
×
2202

2203
        r = pidref_set_pidfd_consume(ret_pidref, TAKE_FD(pidfd));
2,200✔
2204
        if (r < 0)
2,200✔
2205
                return r;
2206

2207
        return FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP);
2,200✔
2208
#else
2209
        pid_t pid;
2210

2211
        r = posix_spawn(&pid, path, NULL, &attr, argv, envp);
2212
        if (r != 0)
2213
                return -r;
2214

2215
        r = pidref_set_pid(ret_pidref, pid);
2216
        if (r < 0)
2217
                return r;
2218

2219
        return 0; /* We did not use CLONE_INTO_CGROUP so return 0, the caller will have to move the child */
2220
#endif
2221
}
2222

2223
int proc_dir_open(DIR **ret) {
2224
        DIR *d;
11✔
2225

2226
        assert(ret);
11✔
2227

2228
        d = opendir("/proc");
11✔
2229
        if (!d)
11✔
2230
                return -errno;
×
2231

2232
        *ret = d;
11✔
2233
        return 0;
11✔
2234
}
2235

2236
int proc_dir_read(DIR *d, pid_t *ret) {
2237
        assert(d);
982✔
2238

2239
        for (;;) {
1,646✔
2240
                struct dirent *de;
1,646✔
2241

2242
                errno = 0;
1,646✔
2243
                de = readdir_no_dot(d);
1,646✔
2244
                if (!de) {
1,646✔
2245
                        if (errno != 0)
11✔
2246
                                return -errno;
×
2247

2248
                        break;
11✔
2249
                }
2250

2251
                if (!IN_SET(de->d_type, DT_DIR, DT_UNKNOWN))
1,635✔
2252
                        continue;
543✔
2253

2254
                if (parse_pid(de->d_name, ret) >= 0)
1,092✔
2255
                        return 1;
2256
        }
2257

2258
        if (ret)
11✔
2259
                *ret = 0;
11✔
2260
        return 0;
2261
}
2262

2263
int proc_dir_read_pidref(DIR *d, PidRef *ret) {
2264
        int r;
936✔
2265

2266
        assert(d);
936✔
2267

2268
        for (;;) {
936✔
2269
                pid_t pid;
936✔
2270

2271
                r = proc_dir_read(d, &pid);
936✔
2272
                if (r < 0)
936✔
2273
                        return r;
926✔
2274
                if (r == 0)
936✔
2275
                        break;
2276

2277
                r = pidref_set_pid(ret, pid);
926✔
2278
                if (r == -ESRCH) /* gone by now? skip it */
926✔
UNCOV
2279
                        continue;
×
2280
                if (r < 0)
926✔
2281
                        return r;
×
2282

2283
                return 1;
2284
        }
2285

2286
        if (ret)
10✔
2287
                *ret = PIDREF_NULL;
10✔
2288
        return 0;
2289
}
2290

2291
static const char *const sigchld_code_table[] = {
2292
        [CLD_EXITED] = "exited",
2293
        [CLD_KILLED] = "killed",
2294
        [CLD_DUMPED] = "dumped",
2295
        [CLD_TRAPPED] = "trapped",
2296
        [CLD_STOPPED] = "stopped",
2297
        [CLD_CONTINUED] = "continued",
2298
};
2299

2300
DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
7,629✔
2301

2302
static const char* const sched_policy_table[] = {
2303
        [SCHED_OTHER] = "other",
2304
        [SCHED_BATCH] = "batch",
2305
        [SCHED_IDLE] = "idle",
2306
        [SCHED_FIFO] = "fifo",
2307
        [SCHED_RR] = "rr",
2308
};
2309

2310
DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);
3✔
2311

2312
_noreturn_ void report_errno_and_exit(int errno_fd, int error) {
2313
        int r;
48✔
2314

2315
        if (error >= 0)
48✔
2316
                _exit(EXIT_SUCCESS);
47✔
2317

2318
        assert(errno_fd >= 0);
1✔
2319

2320
        r = loop_write(errno_fd, &error, sizeof(error));
1✔
2321
        if (r < 0)
1✔
2322
                log_debug_errno(r, "Failed to write errno to errno_fd=%d: %m", errno_fd);
×
2323

2324
        _exit(EXIT_FAILURE);
1✔
2325
}
2326

2327
int read_errno(int errno_fd) {
2328
        int r;
1✔
2329

2330
        assert(errno_fd >= 0);
1✔
2331

2332
        /* The issue here is that it's impossible to distinguish between an error code returned by child and
2333
         * IO error arose when reading it. So, the function logs errors and return EIO for the later case. */
2334

2335
        ssize_t n = loop_read(errno_fd, &r, sizeof(r), /* do_poll = */ false);
1✔
2336
        if (n < 0) {
1✔
2337
                log_debug_errno(n, "Failed to read errno: %m");
×
2338
                return -EIO;
×
2339
        }
2340
        if (n == sizeof(r)) {
1✔
2341
                if (r == 0)
×
2342
                        return 0;
2343
                if (r < 0) /* child process reported an error, return it */
×
2344
                        return log_debug_errno(r, "Child process failed with errno: %m");
×
2345
                return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Received an errno, but it's a positive value.");
×
2346
        }
2347
        if (n != 0)
1✔
2348
                return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Received unexpected amount of bytes while reading errno.");
×
2349

2350
        /* the process exited without reporting an error, assuming success */
2351
        return 0;
2352
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc