• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 15837872256

23 Jun 2025 09:28PM UTC coverage: 72.09% (-0.02%) from 72.105%
15837872256

push

github

bluca
test-cpu-set-util: fix check for CPUSet.allocated

The check was simply wrong and meaningless, as it always checked
CPUSet.allocated is greater than or equals to 1, as sizeof(__cpu_mask) is 8.

Let's make the test more strict.

300458 of 416781 relevant lines covered (72.09%)

709101.32 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.87
/src/basic/process-util.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <linux/oom.h>
4
#include <pthread.h>
5
#include <spawn.h>
6
#include <stdio.h>
7
#include <sys/mount.h>
8
#include <sys/personality.h>
9
#include <sys/prctl.h>
10
#include <sys/wait.h>
11
#include <syslog.h>
12
#include <threads.h>
13
#include <unistd.h>
14
#if HAVE_VALGRIND_VALGRIND_H
15
#include <valgrind/valgrind.h>
16
#endif
17

18
#include "sd-messages.h"
19

20
#include "alloc-util.h"
21
#include "architecture.h"
22
#include "argv-util.h"
23
#include "cgroup-util.h"
24
#include "dirent-util.h"
25
#include "env-file.h"
26
#include "errno-util.h"
27
#include "escape.h"
28
#include "fd-util.h"
29
#include "fileio.h"
30
#include "fs-util.h"
31
#include "hostname-util.h"
32
#include "io-util.h"
33
#include "iovec-util.h"
34
#include "locale-util.h"
35
#include "log.h"
36
#include "memory-util.h"
37
#include "missing_sched.h"
38
#include "missing_syscall.h"
39
#include "mountpoint-util.h"
40
#include "namespace-util.h"
41
#include "nulstr-util.h"
42
#include "parse-util.h"
43
#include "path-util.h"
44
#include "pidfd-util.h"
45
#include "pidref.h"
46
#include "process-util.h"
47
#include "raw-clone.h"
48
#include "rlimit-util.h"
49
#include "signal-util.h"
50
#include "socket-util.h"
51
#include "stat-util.h"
52
#include "stdio-util.h"
53
#include "string-table.h"
54
#include "string-util.h"
55
#include "time-util.h"
56
#include "user-util.h"
57

58
/* The kernel limits userspace processes to TASK_COMM_LEN (16 bytes), but allows higher values for its own
59
 * workers, e.g. "kworker/u9:3-kcryptd/253:0". Let's pick a fixed smallish limit that will work for the kernel.
60
 */
61
#define COMM_MAX_LEN 128
62

63
static int get_process_state(pid_t pid) {
11,202✔
64
        _cleanup_free_ char *line = NULL;
11,202✔
65
        const char *p;
11,202✔
66
        char state;
11,202✔
67
        int r;
11,202✔
68

69
        assert(pid >= 0);
11,202✔
70

71
        /* Shortcut: if we are enquired about our own state, we are obviously running */
72
        if (pid == 0 || pid == getpid_cached())
11,202✔
73
                return (unsigned char) 'R';
×
74

75
        p = procfs_file_alloca(pid, "stat");
11,202✔
76

77
        r = read_one_line_file(p, &line);
11,202✔
78
        if (r == -ENOENT)
11,202✔
79
                return -ESRCH;
80
        if (r < 0)
8,806✔
81
                return r;
82

83
        p = strrchr(line, ')');
8,802✔
84
        if (!p)
8,802✔
85
                return -EIO;
86

87
        p++;
8,802✔
88

89
        if (sscanf(p, " %c", &state) != 1)
8,802✔
90
                return -EIO;
91

92
        return (unsigned char) state;
8,802✔
93
}
94

95
int pid_get_comm(pid_t pid, char **ret) {
96
        _cleanup_free_ char *escaped = NULL, *comm = NULL;
45,411✔
97
        int r;
45,411✔
98

99
        assert(pid >= 0);
45,411✔
100
        assert(ret);
45,411✔
101

102
        if (pid == 0 || pid == getpid_cached()) {
45,411✔
103
                comm = new0(char, TASK_COMM_LEN + 1); /* Must fit in 16 byte according to prctl(2) */
27,004✔
104
                if (!comm)
27,004✔
105
                        return -ENOMEM;
106

107
                if (prctl(PR_GET_NAME, comm) < 0)
27,004✔
108
                        return -errno;
×
109
        } else {
110
                const char *p;
18,407✔
111

112
                p = procfs_file_alloca(pid, "comm");
18,407✔
113

114
                /* Note that process names of kernel threads can be much longer than TASK_COMM_LEN */
115
                r = read_one_line_file(p, &comm);
18,407✔
116
                if (r == -ENOENT)
18,407✔
117
                        return -ESRCH;
118
                if (r < 0)
14,573✔
119
                        return r;
120
        }
121

122
        escaped = new(char, COMM_MAX_LEN);
41,574✔
123
        if (!escaped)
41,574✔
124
                return -ENOMEM;
125

126
        /* Escape unprintable characters, just in case, but don't grow the string beyond the underlying size */
127
        cellescape(escaped, COMM_MAX_LEN, comm);
41,574✔
128

129
        *ret = TAKE_PTR(escaped);
41,574✔
130
        return 0;
41,574✔
131
}
132

133
int pidref_get_comm(const PidRef *pid, char **ret) {
134
        _cleanup_free_ char *comm = NULL;
30✔
135
        int r;
30✔
136

137
        if (!pidref_is_set(pid))
30✔
138
                return -ESRCH;
139

140
        if (pidref_is_remote(pid))
60✔
141
                return -EREMOTE;
142

143
        r = pid_get_comm(pid->pid, &comm);
30✔
144
        if (r < 0)
30✔
145
                return r;
146

147
        r = pidref_verify(pid);
30✔
148
        if (r < 0)
30✔
149
                return r;
150

151
        if (ret)
30✔
152
                *ret = TAKE_PTR(comm);
30✔
153
        return 0;
154
}
155

156
static int pid_get_cmdline_nulstr(
18,317✔
157
                pid_t pid,
158
                size_t max_size,
159
                ProcessCmdlineFlags flags,
160
                char **ret,
161
                size_t *ret_size) {
162

163
        _cleanup_free_ char *t = NULL;
18,317✔
164
        const char *p;
18,317✔
165
        size_t k;
18,317✔
166
        int r;
18,317✔
167

168
        /* Retrieves a process' command line as a "sized nulstr", i.e. possibly without the last NUL, but
169
         * with a specified size.
170
         *
171
         * If PROCESS_CMDLINE_COMM_FALLBACK is specified in flags and the process has no command line set
172
         * (the case for kernel threads), or has a command line that resolves to the empty string, will
173
         * return the "comm" name of the process instead. This will use at most _SC_ARG_MAX bytes of input
174
         * data.
175
         *
176
         * Returns an error, 0 if output was read but is truncated, 1 otherwise.
177
         */
178

179
        p = procfs_file_alloca(pid, "cmdline");
18,533✔
180
        r = read_virtual_file(p, max_size, &t, &k); /* Let's assume that each input byte results in >= 1
18,317✔
181
                                                     * columns of output. We ignore zero-width codepoints. */
182
        if (r == -ENOENT)
18,317✔
183
                return -ESRCH;
184
        if (r < 0)
14,394✔
185
                return r;
186

187
        if (k == 0) {
14,393✔
188
                if (!(flags & PROCESS_CMDLINE_COMM_FALLBACK))
775✔
189
                        return -ENOENT;
740✔
190

191
                /* Kernel threads have no argv[] */
192
                _cleanup_free_ char *comm = NULL;
35✔
193

194
                r = pid_get_comm(pid, &comm);
35✔
195
                if (r < 0)
35✔
196
                        return r;
197

198
                free(t);
35✔
199
                t = strjoin("[", comm, "]");
35✔
200
                if (!t)
35✔
201
                        return -ENOMEM;
202

203
                k = strlen(t);
35✔
204
                r = k <= max_size;
35✔
205
                if (r == 0) /* truncation */
35✔
206
                        t[max_size] = '\0';
12✔
207
        }
208

209
        if (ret)
13,653✔
210
                *ret = TAKE_PTR(t);
13,653✔
211
        if (ret_size)
13,653✔
212
                *ret_size = k;
13,653✔
213

214
        return r;
215
}
216

217
int pid_get_cmdline(pid_t pid, size_t max_columns, ProcessCmdlineFlags flags, char **ret) {
218
        _cleanup_free_ char *t = NULL;
13,390✔
219
        size_t k;
13,390✔
220
        char *ans;
13,390✔
221

222
        assert(pid >= 0);
13,390✔
223
        assert(ret);
13,390✔
224

225
        /* Retrieve and format a command line. See above for discussion of retrieval options.
226
         *
227
         * There are two main formatting modes:
228
         *
229
         * - when PROCESS_CMDLINE_QUOTE is specified, output is quoted in C/Python style. If no shell special
230
         *   characters are present, this output can be copy-pasted into the terminal to execute. UTF-8
231
         *   output is assumed.
232
         *
233
         * - otherwise, a compact non-roundtrippable form is returned. Non-UTF8 bytes are replaced by �. The
234
         *   returned string is of the specified console width at most, abbreviated with an ellipsis.
235
         *
236
         * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
237
         * PROCESS_CMDLINE_COMM_FALLBACK is not specified). Returns 0 and sets *line otherwise. */
238

239
        int full = pid_get_cmdline_nulstr(pid, max_columns, flags, &t, &k);
13,390✔
240
        if (full < 0)
13,390✔
241
                return full;
242

243
        if (flags & (PROCESS_CMDLINE_QUOTE | PROCESS_CMDLINE_QUOTE_POSIX)) {
8,803✔
244
                ShellEscapeFlags shflags = SHELL_ESCAPE_EMPTY |
8,624✔
245
                        FLAGS_SET(flags, PROCESS_CMDLINE_QUOTE_POSIX) * SHELL_ESCAPE_POSIX;
8,624✔
246

247
                assert(!(flags & PROCESS_CMDLINE_USE_LOCALE));
8,624✔
248

249
                _cleanup_strv_free_ char **args = NULL;
8,624✔
250

251
                /* Drop trailing NULs, otherwise strv_parse_nulstr() adds additional empty strings at the end.
252
                 * See also issue #21186. */
253
                args = strv_parse_nulstr_full(t, k, /* drop_trailing_nuls = */ true);
8,624✔
254
                if (!args)
8,624✔
255
                        return -ENOMEM;
256

257
                ans = quote_command_line(args, shflags);
8,624✔
258
                if (!ans)
8,624✔
259
                        return -ENOMEM;
260
        } else {
261
                /* Arguments are separated by NULs. Let's replace those with spaces. */
262
                for (size_t i = 0; i < k - 1; i++)
10,938✔
263
                        if (t[i] == '\0')
10,759✔
264
                                t[i] = ' ';
416✔
265

266
                delete_trailing_chars(t, WHITESPACE);
179✔
267

268
                bool eight_bit = (flags & PROCESS_CMDLINE_USE_LOCALE) && !is_locale_utf8();
179✔
269

270
                ans = escape_non_printable_full(t, max_columns,
537✔
271
                                                eight_bit * XESCAPE_8_BIT | !full * XESCAPE_FORCE_ELLIPSIS);
328✔
272
                if (!ans)
179✔
273
                        return -ENOMEM;
274

275
                ans = str_realloc(ans);
179✔
276
        }
277

278
        *ret = ans;
8,803✔
279
        return 0;
8,803✔
280
}
281

282
int pidref_get_cmdline(const PidRef *pid, size_t max_columns, ProcessCmdlineFlags flags, char **ret) {
283
        _cleanup_free_ char *s = NULL;
125✔
284
        int r;
125✔
285

286
        if (!pidref_is_set(pid))
125✔
287
                return -ESRCH;
288

289
        if (pidref_is_remote(pid))
250✔
290
                return -EREMOTE;
291

292
        r = pid_get_cmdline(pid->pid, max_columns, flags, &s);
125✔
293
        if (r < 0)
125✔
294
                return r;
295

296
        r = pidref_verify(pid);
125✔
297
        if (r < 0)
125✔
298
                return r;
299

300
        if (ret)
125✔
301
                *ret = TAKE_PTR(s);
125✔
302
        return 0;
303
}
304

305
int pid_get_cmdline_strv(pid_t pid, ProcessCmdlineFlags flags, char ***ret) {
306
        _cleanup_free_ char *t = NULL;
4,927✔
307
        char **args;
4,927✔
308
        size_t k;
4,927✔
309
        int r;
4,927✔
310

311
        assert(pid >= 0);
4,927✔
312
        assert((flags & ~PROCESS_CMDLINE_COMM_FALLBACK) == 0);
4,927✔
313
        assert(ret);
4,927✔
314

315
        r = pid_get_cmdline_nulstr(pid, SIZE_MAX, flags, &t, &k);
4,927✔
316
        if (r < 0)
4,927✔
317
                return r;
318

319
        args = strv_parse_nulstr_full(t, k, /* drop_trailing_nuls = */ true);
4,850✔
320
        if (!args)
4,850✔
321
                return -ENOMEM;
322

323
        *ret = args;
4,850✔
324
        return 0;
4,850✔
325
}
326

327
int pidref_get_cmdline_strv(const PidRef *pid, ProcessCmdlineFlags flags, char ***ret) {
328
        _cleanup_strv_free_ char **args = NULL;
×
329
        int r;
×
330

331
        if (!pidref_is_set(pid))
×
332
                return -ESRCH;
333

334
        if (pidref_is_remote(pid))
×
335
                return -EREMOTE;
336

337
        r = pid_get_cmdline_strv(pid->pid, flags, &args);
×
338
        if (r < 0)
×
339
                return r;
340

341
        r = pidref_verify(pid);
×
342
        if (r < 0)
×
343
                return r;
344

345
        if (ret)
×
346
                *ret = TAKE_PTR(args);
×
347

348
        return 0;
349
}
350

351
int container_get_leader(const char *machine, pid_t *pid) {
352
        _cleanup_free_ char *s = NULL, *class = NULL;
10✔
353
        const char *p;
10✔
354
        pid_t leader;
10✔
355
        int r;
10✔
356

357
        assert(machine);
10✔
358
        assert(pid);
10✔
359

360
        if (streq(machine, ".host")) {
10✔
361
                *pid = 1;
1✔
362
                return 0;
1✔
363
        }
364

365
        if (!hostname_is_valid(machine, 0))
9✔
366
                return -EINVAL;
367

368
        p = strjoina("/run/systemd/machines/", machine);
45✔
369
        r = parse_env_file(NULL, p,
9✔
370
                           "LEADER", &s,
371
                           "CLASS", &class);
372
        if (r == -ENOENT)
9✔
373
                return -EHOSTDOWN;
374
        if (r < 0)
9✔
375
                return r;
376
        if (!s)
9✔
377
                return -EIO;
378

379
        if (!streq_ptr(class, "container"))
9✔
380
                return -EIO;
381

382
        r = parse_pid(s, &leader);
9✔
383
        if (r < 0)
9✔
384
                return r;
385
        if (leader <= 1)
9✔
386
                return -EIO;
387

388
        *pid = leader;
9✔
389
        return 0;
9✔
390
}
391

392
int pid_is_kernel_thread(pid_t pid) {
393
        _cleanup_free_ char *line = NULL;
3,238✔
394
        unsigned long long flags;
3,238✔
395
        size_t l, i;
3,238✔
396
        const char *p;
3,238✔
397
        char *q;
3,238✔
398
        int r;
3,238✔
399

400
        if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
3,238✔
401
                return 0;
14✔
402
        if (!pid_is_valid(pid))
3,224✔
403
                return -EINVAL;
404

405
        p = procfs_file_alloca(pid, "stat");
3,224✔
406
        r = read_one_line_file(p, &line);
3,224✔
407
        if (r == -ENOENT)
3,224✔
408
                return -ESRCH;
409
        if (r < 0)
3,224✔
410
                return r;
411

412
        /* Skip past the comm field */
413
        q = strrchr(line, ')');
3,224✔
414
        if (!q)
3,224✔
415
                return -EINVAL;
416
        q++;
3,224✔
417

418
        /* Skip 6 fields to reach the flags field */
419
        for (i = 0; i < 6; i++) {
22,568✔
420
                l = strspn(q, WHITESPACE);
19,344✔
421
                if (l < 1)
19,344✔
422
                        return -EINVAL;
423
                q += l;
19,344✔
424

425
                l = strcspn(q, WHITESPACE);
19,344✔
426
                if (l < 1)
19,344✔
427
                        return -EINVAL;
428
                q += l;
19,344✔
429
        }
430

431
        /* Skip preceding whitespace */
432
        l = strspn(q, WHITESPACE);
3,224✔
433
        if (l < 1)
3,224✔
434
                return -EINVAL;
435
        q += l;
3,224✔
436

437
        /* Truncate the rest */
438
        l = strcspn(q, WHITESPACE);
3,224✔
439
        if (l < 1)
3,224✔
440
                return -EINVAL;
441
        q[l] = 0;
3,224✔
442

443
        r = safe_atollu(q, &flags);
3,224✔
444
        if (r < 0)
3,224✔
445
                return r;
446

447
        return !!(flags & PF_KTHREAD);
3,224✔
448
}
449

450
int pidref_is_kernel_thread(const PidRef *pid) {
451
        int result, r;
1,403✔
452

453
        if (!pidref_is_set(pid))
1,403✔
454
                return -ESRCH;
455

456
        if (pidref_is_remote(pid))
1,403✔
457
                return -EREMOTE;
458

459
        result = pid_is_kernel_thread(pid->pid);
1,403✔
460
        if (result < 0)
1,403✔
461
                return result;
462

463
        r = pidref_verify(pid); /* Verify that the PID wasn't reused since */
1,403✔
464
        if (r < 0)
1,403✔
465
                return r;
×
466

467
        return result;
468
}
469

470
static int get_process_link_contents(pid_t pid, const char *proc_file, char **ret) {
12,945✔
471
        const char *p;
12,945✔
472
        int r;
12,945✔
473

474
        assert(proc_file);
12,945✔
475

476
        p = procfs_file_alloca(pid, proc_file);
12,949✔
477

478
        r = readlink_malloc(p, ret);
12,945✔
479
        return (r == -ENOENT && proc_mounted() > 0) ? -ESRCH : r;
12,945✔
480
}
481

482
int get_process_exe(pid_t pid, char **ret) {
483
        char *d;
12,915✔
484
        int r;
12,915✔
485

486
        assert(pid >= 0);
12,915✔
487

488
        r = get_process_link_contents(pid, "exe", ret);
12,915✔
489
        if (r < 0)
12,915✔
490
                return r;
491

492
        if (ret) {
8,839✔
493
                d = endswith(*ret, " (deleted)");
8,839✔
494
                if (d)
8,839✔
495
                        *d = '\0';
×
496
        }
497

498
        return 0;
499
}
500

501
int pid_get_uid(pid_t pid, uid_t *ret) {
502
        int r;
3,669✔
503

504
        assert(pid >= 0);
3,669✔
505
        assert(ret);
3,669✔
506

507
        if (pid == 0 || pid == getpid_cached()) {
3,669✔
508
                *ret = getuid();
4✔
509
                return 0;
3,669✔
510
        }
511

512
        _cleanup_free_ char *v = NULL;
3,665✔
513
        r = procfs_file_get_field(pid, "status", "Uid", &v);
3,665✔
514
        if (r == -ENOENT)
3,665✔
515
                return -ESRCH;
516
        if (r < 0)
173✔
517
                return r;
518

519
        return parse_uid(v, ret);
173✔
520
}
521

522
int pidref_get_uid(const PidRef *pid, uid_t *ret) {
523
        int r;
54✔
524

525
        if (!pidref_is_set(pid))
54✔
526
                return -ESRCH;
54✔
527

528
        if (pidref_is_remote(pid))
54✔
529
                return -EREMOTE;
530

531
        if (pid->fd >= 0) {
54✔
532
                r = pidfd_get_uid(pid->fd, ret);
54✔
533
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
54✔
534
                        return r;
535
        }
536

537
        uid_t uid;
9✔
538
        r = pid_get_uid(pid->pid, &uid);
9✔
539
        if (r < 0)
9✔
540
                return r;
541

542
        r = pidref_verify(pid);
9✔
543
        if (r < 0)
9✔
544
                return r;
545

546
        if (ret)
9✔
547
                *ret = uid;
9✔
548
        return 0;
549
}
550

551
int get_process_gid(pid_t pid, gid_t *ret) {
552
        int r;
3,660✔
553

554
        assert(pid >= 0);
3,660✔
555
        assert(ret);
3,660✔
556

557
        if (pid == 0 || pid == getpid_cached()) {
3,660✔
558
                *ret = getgid();
1✔
559
                return 0;
3,660✔
560
        }
561

562
        _cleanup_free_ char *v = NULL;
3,659✔
563
        r = procfs_file_get_field(pid, "status", "Gid", &v);
3,659✔
564
        if (r == -ENOENT)
3,659✔
565
                return -ESRCH;
566
        if (r < 0)
167✔
567
                return r;
568

569
        return parse_gid(v, ret);
3,659✔
570
}
571

572
int get_process_cwd(pid_t pid, char **ret) {
573
        assert(pid >= 0);
15✔
574

575
        if (pid == 0 || pid == getpid_cached())
15✔
576
                return safe_getcwd(ret);
×
577

578
        return get_process_link_contents(pid, "cwd", ret);
15✔
579
}
580

581
int get_process_root(pid_t pid, char **ret) {
582
        assert(pid >= 0);
15✔
583
        return get_process_link_contents(pid, "root", ret);
15✔
584
}
585

586
#define ENVIRONMENT_BLOCK_MAX (5U*1024U*1024U)
587

588
int get_process_environ(pid_t pid, char **ret) {
589
        _cleanup_fclose_ FILE *f = NULL;
17✔
590
        _cleanup_free_ char *outcome = NULL;
17✔
591
        size_t sz = 0;
17✔
592
        const char *p;
17✔
593
        int r;
17✔
594

595
        assert(pid >= 0);
17✔
596
        assert(ret);
17✔
597

598
        p = procfs_file_alloca(pid, "environ");
17✔
599

600
        r = fopen_unlocked(p, "re", &f);
17✔
601
        if (r == -ENOENT)
17✔
602
                return -ESRCH;
603
        if (r < 0)
17✔
604
                return r;
605

606
        for (;;) {
7,005✔
607
                char c;
7,022✔
608

609
                if (sz >= ENVIRONMENT_BLOCK_MAX)
7,022✔
610
                        return -ENOBUFS;
×
611

612
                if (!GREEDY_REALLOC(outcome, sz + 5))
7,022✔
613
                        return -ENOMEM;
614

615
                r = safe_fgetc(f, &c);
7,022✔
616
                if (r < 0)
7,022✔
617
                        return r;
618
                if (r == 0)
7,022✔
619
                        break;
620

621
                if (c == '\0')
7,005✔
622
                        outcome[sz++] = '\n';
249✔
623
                else
624
                        sz += cescape_char(c, outcome + sz);
6,756✔
625
        }
626

627
        outcome[sz] = '\0';
17✔
628
        *ret = TAKE_PTR(outcome);
17✔
629

630
        return 0;
17✔
631
}
632

633
int pid_get_ppid(pid_t pid, pid_t *ret) {
634
        _cleanup_free_ char *line = NULL;
1,401✔
635
        unsigned long ppid;
1,401✔
636
        const char *p;
1,401✔
637
        int r;
1,401✔
638

639
        assert(pid >= 0);
1,401✔
640

641
        if (pid == 0)
1,401✔
642
                pid = getpid_cached();
1✔
643
        if (pid == 1) /* PID 1 has no parent, shortcut this case */
1,401✔
644
                return -EADDRNOTAVAIL;
645

646
        if (pid == getpid_cached()) {
1,397✔
647
                if (ret)
6✔
648
                        *ret = getppid();
6✔
649
                return 0;
6✔
650
        }
651

652
        p = procfs_file_alloca(pid, "stat");
1,391✔
653
        r = read_one_line_file(p, &line);
1,391✔
654
        if (r == -ENOENT)
1,391✔
655
                return -ESRCH;
656
        if (r < 0)
1,390✔
657
                return r;
658

659
        /* Let's skip the pid and comm fields. The latter is enclosed in () but does not escape any () in its
660
         * value, so let's skip over it manually */
661

662
        p = strrchr(line, ')');
1,390✔
663
        if (!p)
1,390✔
664
                return -EIO;
665
        p++;
1,390✔
666

667
        if (sscanf(p, " "
1,390✔
668
                   "%*c "  /* state */
669
                   "%lu ", /* ppid */
670
                   &ppid) != 1)
671
                return -EIO;
672

673
        /* If ppid is zero the process has no parent. Which might be the case for PID 1 (caught above)
674
         * but also for processes originating in other namespaces that are inserted into a pidns.
675
         * Return a recognizable error in this case. */
676
        if (ppid == 0)
1,390✔
677
                return -EADDRNOTAVAIL;
678

679
        if ((pid_t) ppid < 0 || (unsigned long) (pid_t) ppid != ppid)
1,390✔
680
                return -ERANGE;
681

682
        if (ret)
1,390✔
683
                *ret = (pid_t) ppid;
1,390✔
684

685
        return 0;
686
}
687

688
int pidref_get_ppid(const PidRef *pidref, pid_t *ret) {
689
        int r;
2,431✔
690

691
        if (!pidref_is_set(pidref))
2,431✔
692
                return -ESRCH;
2,431✔
693

694
        if (pidref_is_remote(pidref))
2,431✔
695
                return -EREMOTE;
696

697
        if (pidref->fd >= 0) {
2,431✔
698
                r = pidfd_get_ppid(pidref->fd, ret);
2,431✔
699
                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
2,431✔
700
                        return r;
701
        }
702

703
        pid_t ppid;
1,395✔
704
        r = pid_get_ppid(pidref->pid, ret ? &ppid : NULL);
1,395✔
705
        if (r < 0)
1,395✔
706
                return r;
707

708
        r = pidref_verify(pidref);
1,394✔
709
        if (r < 0)
1,394✔
710
                return r;
711

712
        if (ret)
1,394✔
713
                *ret = ppid;
1,394✔
714
        return 0;
715
}
716

717
int pidref_get_ppid_as_pidref(const PidRef *pidref, PidRef *ret) {
718
        pid_t ppid;
17✔
719
        int r;
17✔
720

721
        assert(ret);
17✔
722

723
        r = pidref_get_ppid(pidref, &ppid);
17✔
724
        if (r < 0)
17✔
725
                return r;
17✔
726

727
        for (unsigned attempt = 0; attempt < 16; attempt++) {
16✔
728
                _cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
16✔
729

730
                r = pidref_set_pid(&parent, ppid);
16✔
731
                if (r < 0)
16✔
732
                        return r;
733

734
                /* If we have a pidfd of the original PID, let's verify that the process we acquired really
735
                 * is the parent still */
736
                if (pidref->fd >= 0) {
16✔
737
                        r = pidref_get_ppid(pidref, &ppid);
16✔
738
                        if (r < 0)
16✔
739
                                return r;
740

741
                        /* Did the PPID change since we queried it? if so we might have pinned the wrong
742
                         * process, if its PID got reused by now. Let's try again */
743
                        if (parent.pid != ppid)
16✔
744
                                continue;
×
745
                }
746

747
                *ret = TAKE_PIDREF(parent);
16✔
748
                return 0;
16✔
749
        }
750

751
        /* Give up after 16 tries */
752
        return -ENOTRECOVERABLE;
753
}
754

755
int pid_get_start_time(pid_t pid, usec_t *ret) {
756
        _cleanup_free_ char *line = NULL;
658✔
757
        const char *p;
658✔
758
        int r;
658✔
759

760
        assert(pid >= 0);
658✔
761

762
        p = procfs_file_alloca(pid, "stat");
658✔
763
        r = read_one_line_file(p, &line);
658✔
764
        if (r == -ENOENT)
658✔
765
                return -ESRCH;
766
        if (r < 0)
658✔
767
                return r;
768

769
        /* Let's skip the pid and comm fields. The latter is enclosed in () but does not escape any () in its
770
         * value, so let's skip over it manually */
771

772
        p = strrchr(line, ')');
658✔
773
        if (!p)
658✔
774
                return -EIO;
775
        p++;
658✔
776

777
        unsigned long llu;
658✔
778

779
        if (sscanf(p, " "
658✔
780
                   "%*c " /* state */
781
                   "%*u " /* ppid */
782
                   "%*u " /* pgrp */
783
                   "%*u " /* session */
784
                   "%*u " /* tty_nr */
785
                   "%*u " /* tpgid */
786
                   "%*u " /* flags */
787
                   "%*u " /* minflt */
788
                   "%*u " /* cminflt */
789
                   "%*u " /* majflt */
790
                   "%*u " /* cmajflt */
791
                   "%*u " /* utime */
792
                   "%*u " /* stime */
793
                   "%*u " /* cutime */
794
                   "%*u " /* cstime */
795
                   "%*i " /* priority */
796
                   "%*i " /* nice */
797
                   "%*u " /* num_threads */
798
                   "%*u " /* itrealvalue */
799
                   "%lu ", /* starttime */
800
                   &llu) != 1)
801
                return -EIO;
802

803
        if (ret)
658✔
804
                *ret = jiffies_to_usec(llu); /* CLOCK_BOOTTIME */
658✔
805

806
        return 0;
807
}
808

809
int pidref_get_start_time(const PidRef *pid, usec_t *ret) {
810
        usec_t t;
658✔
811
        int r;
658✔
812

813
        if (!pidref_is_set(pid))
658✔
814
                return -ESRCH;
658✔
815

816
        if (pidref_is_remote(pid))
658✔
817
                return -EREMOTE;
818

819
        r = pid_get_start_time(pid->pid, ret ? &t : NULL);
658✔
820
        if (r < 0)
658✔
821
                return r;
822

823
        r = pidref_verify(pid);
658✔
824
        if (r < 0)
658✔
825
                return r;
826

827
        if (ret)
658✔
828
                *ret = t;
658✔
829

830
        return 0;
831
}
832

833
int get_process_umask(pid_t pid, mode_t *ret) {
834
        _cleanup_free_ char *m = NULL;
18,812✔
835
        int r;
18,812✔
836

837
        assert(pid >= 0);
18,812✔
838
        assert(ret);
18,812✔
839

840
        r = procfs_file_get_field(pid, "status", "Umask", &m);
18,812✔
841
        if (r == -ENOENT)
18,812✔
842
                return -ESRCH;
843
        if (r < 0)
18,812✔
844
                return r;
845

846
        return parse_mode(m, ret);
18,812✔
847
}
848

849
int wait_for_terminate(pid_t pid, siginfo_t *ret) {
850
        return pidref_wait_for_terminate(&PIDREF_MAKE_FROM_PID(pid), ret);
675✔
851
}
852

853
/*
854
 * Return values:
855
 * < 0 : wait_for_terminate() failed to get the state of the
856
 *       process, the process was terminated by a signal, or
857
 *       failed for an unknown reason.
858
 * >=0 : The process terminated normally, and its exit code is
859
 *       returned.
860
 *
861
 * That is, success is indicated by a return value of zero, and an
862
 * error is indicated by a non-zero value.
863
 *
864
 * A warning is emitted if the process terminates abnormally,
865
 * and also if it returns non-zero unless check_exit_code is true.
866
 */
867
int pidref_wait_for_terminate_and_check(const char *name, PidRef *pidref, WaitFlags flags) {
868
        int r;
9,199✔
869

870
        if (!pidref_is_set(pidref))
9,199✔
871
                return -ESRCH;
9,199✔
872
        if (pidref_is_remote(pidref))
18,398✔
873
                return -EREMOTE;
874
        if (pidref->pid == 1 || pidref_is_self(pidref))
9,199✔
875
                return -ECHILD;
×
876

877
        _cleanup_free_ char *buffer = NULL;
9,199✔
878
        if (!name) {
9,199✔
879
                r = pidref_get_comm(pidref, &buffer);
×
880
                if (r < 0)
×
881
                        log_debug_errno(r, "Failed to acquire process name of " PID_FMT ", ignoring: %m", pidref->pid);
×
882
                else
883
                        name = buffer;
×
884
        }
885

886
        int prio = flags & WAIT_LOG_ABNORMAL ? LOG_ERR : LOG_DEBUG;
9,199✔
887

888
        siginfo_t status;
9,199✔
889
        r = pidref_wait_for_terminate(pidref, &status);
9,199✔
890
        if (r < 0)
9,199✔
891
                return log_full_errno(prio, r, "Failed to wait for %s: %m", strna(name));
×
892

893
        if (status.si_code == CLD_EXITED) {
9,199✔
894
                if (status.si_status != EXIT_SUCCESS)
9,199✔
895
                        log_full(flags & WAIT_LOG_NON_ZERO_EXIT_STATUS ? LOG_ERR : LOG_DEBUG,
54✔
896
                                 "%s failed with exit status %i.", strna(name), status.si_status);
897
                else
898
                        log_debug("%s succeeded.", name);
9,145✔
899

900
                return status.si_status;
9,199✔
901

902
        } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
×
903

904
                log_full(prio, "%s terminated by signal %s.", strna(name), signal_to_string(status.si_status));
×
905
                return -EPROTO;
×
906
        }
907

908
        log_full(prio, "%s failed due to unknown reason.", strna(name));
×
909
        return -EPROTO;
910
}
911

912
int wait_for_terminate_and_check(const char *name, pid_t pid, WaitFlags flags) {
913
        return pidref_wait_for_terminate_and_check(name, &PIDREF_MAKE_FROM_PID(pid), flags);
7,939✔
914
}
915

916
/*
917
 * Return values:
918
 *
919
 * < 0 : wait_for_terminate_with_timeout() failed to get the state of the process, the process timed out, the process
920
 *       was terminated by a signal, or failed for an unknown reason.
921
 *
922
 * >=0 : The process terminated normally with no failures.
923
 *
924
 * Success is indicated by a return value of zero, a timeout is indicated by ETIMEDOUT, and all other child failure
925
 * states are indicated by error is indicated by a non-zero value.
926
 *
927
 * This call assumes SIGCHLD has been blocked already, in particular before the child to wait for has been forked off
928
 * to remain entirely race-free.
929
 */
930
int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
931
        sigset_t mask;
×
932
        int r;
×
933
        usec_t until;
×
934

935
        assert_se(sigemptyset(&mask) == 0);
×
936
        assert_se(sigaddset(&mask, SIGCHLD) == 0);
×
937

938
        /* Drop into a sigtimewait-based timeout. Waiting for the
939
         * pid to exit. */
940
        until = usec_add(now(CLOCK_MONOTONIC), timeout);
×
941
        for (;;) {
×
942
                usec_t n;
×
943
                siginfo_t status = {};
×
944

945
                n = now(CLOCK_MONOTONIC);
×
946
                if (n >= until)
×
947
                        break;
948

949
                r = RET_NERRNO(sigtimedwait(&mask, NULL, TIMESPEC_STORE(until - n)));
×
950
                /* Assuming we woke due to the child exiting. */
951
                if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
×
952
                        if (status.si_pid == pid) {
×
953
                                /* This is the correct child. */
954
                                if (status.si_code == CLD_EXITED)
×
955
                                        return status.si_status == 0 ? 0 : -EPROTO;
×
956
                                else
957
                                        return -EPROTO;
958
                        }
959
                }
960
                /* Not the child, check for errors and proceed appropriately */
961
                if (r < 0) {
×
962
                        switch (r) {
×
963
                        case -EAGAIN:
964
                                /* Timed out, child is likely hung. */
965
                                return -ETIMEDOUT;
966
                        case -EINTR:
×
967
                                /* Received a different signal and should retry */
968
                                continue;
×
969
                        default:
×
970
                                /* Return any unexpected errors */
971
                                return r;
×
972
                        }
973
                }
974
        }
975

976
        return -EPROTO;
×
977
}
978

979
void sigkill_wait(pid_t pid) {
980
        assert(pid > 1);
40✔
981

982
        (void) kill(pid, SIGKILL);
40✔
983
        (void) wait_for_terminate(pid, NULL);
40✔
984
}
40✔
985

986
void sigkill_waitp(pid_t *pid) {
987
        PROTECT_ERRNO;
11,928✔
988

989
        if (!pid)
11,928✔
990
                return;
991
        if (*pid <= 1)
11,928✔
992
                return;
993

994
        sigkill_wait(*pid);
39✔
995
}
996

997
void sigterm_wait(pid_t pid) {
998
        assert(pid > 1);
70✔
999

1000
        (void) kill_and_sigcont(pid, SIGTERM);
70✔
1001
        (void) wait_for_terminate(pid, NULL);
70✔
1002
}
70✔
1003

1004
void sigkill_nowait(pid_t pid) {
1005
        assert(pid > 1);
×
1006

1007
        (void) kill(pid, SIGKILL);
×
1008
}
×
1009

1010
void sigkill_nowaitp(pid_t *pid) {
1011
        PROTECT_ERRNO;
×
1012

1013
        if (!pid)
×
1014
                return;
1015
        if (*pid <= 1)
×
1016
                return;
1017

1018
        sigkill_nowait(*pid);
×
1019
}
1020

1021
int kill_and_sigcont(pid_t pid, int sig) {
1022
        int r;
70✔
1023

1024
        r = RET_NERRNO(kill(pid, sig));
70✔
1025

1026
        /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
1027
         * affected by a process being suspended anyway. */
1028
        if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
70✔
1029
                (void) kill(pid, SIGCONT);
70✔
1030

1031
        return r;
70✔
1032
}
1033

1034
int getenv_for_pid(pid_t pid, const char *field, char **ret) {
1035
        _cleanup_fclose_ FILE *f = NULL;
4,749✔
1036
        const char *path;
4,749✔
1037
        size_t sum = 0;
4,749✔
1038
        int r;
4,749✔
1039

1040
        assert(pid >= 0);
4,749✔
1041
        assert(field);
4,749✔
1042
        assert(ret);
4,749✔
1043

1044
        if (pid == 0 || pid == getpid_cached())
4,749✔
1045
                return strdup_to_full(ret, getenv(field));
13✔
1046

1047
        if (!pid_is_valid(pid))
4,736✔
1048
                return -EINVAL;
1049

1050
        path = procfs_file_alloca(pid, "environ");
4,736✔
1051

1052
        r = fopen_unlocked(path, "re", &f);
4,736✔
1053
        if (r == -ENOENT)
4,736✔
1054
                return -ESRCH;
1055
        if (r < 0)
4,255✔
1056
                return r;
1057

1058
        for (;;) {
49,616✔
1059
                _cleanup_free_ char *line = NULL;
23,285✔
1060
                const char *match;
26,334✔
1061

1062
                if (sum > ENVIRONMENT_BLOCK_MAX) /* Give up searching eventually */
26,334✔
1063
                        return -ENOBUFS;
1064

1065
                r = read_nul_string(f, LONG_LINE_MAX, &line);
26,334✔
1066
                if (r < 0)
26,334✔
1067
                        return r;
1068
                if (r == 0)  /* EOF */
26,334✔
1069
                        break;
1070

1071
                sum += r;
23,285✔
1072

1073
                match = startswith(line, field);
23,285✔
1074
                if (match && *match == '=')
23,285✔
1075
                        return strdup_to_full(ret, match + 1);
3✔
1076
        }
1077

1078
        *ret = NULL;
3,049✔
1079
        return 0;
3,049✔
1080
}
1081

1082
int pidref_is_my_child(PidRef *pid) {
1083
        int r;
2,397✔
1084

1085
        if (!pidref_is_set(pid))
2,397✔
1086
                return -ESRCH;
2,397✔
1087

1088
        if (pidref_is_remote(pid))
2,397✔
1089
                return -EREMOTE;
1090

1091
        if (pid->pid == 1 || pidref_is_self(pid))
2,397✔
1092
                return false;
×
1093

1094
        pid_t ppid;
2,397✔
1095
        r = pidref_get_ppid(pid, &ppid);
2,397✔
1096
        if (r == -EADDRNOTAVAIL) /* if this process is outside of our pidns, it is definitely not our child */
2,397✔
1097
                return false;
1098
        if (r < 0)
2,397✔
1099
                return r;
1100

1101
        return ppid == getpid_cached();
2,397✔
1102
}
1103

1104
int pid_is_my_child(pid_t pid) {
1105

1106
        if (pid == 0)
×
1107
                return false;
×
1108

1109
        return pidref_is_my_child(&PIDREF_MAKE_FROM_PID(pid));
×
1110
}
1111

1112
int pidref_is_unwaited(PidRef *pid) {
1113
        int r;
7,411✔
1114

1115
        /* Checks whether a PID is still valid at all, including a zombie */
1116

1117
        if (!pidref_is_set(pid))
7,411✔
1118
                return -ESRCH;
1119

1120
        if (pidref_is_remote(pid))
7,410✔
1121
                return -EREMOTE;
1122

1123
        if (pid->pid == 1 || pidref_is_self(pid))
7,410✔
1124
                return true;
1✔
1125

1126
        r = pidref_kill(pid, 0);
7,409✔
1127
        if (r == -ESRCH)
7,409✔
1128
                return false;
1129
        if (r < 0)
1,716✔
1130
                return r;
76✔
1131

1132
        return true;
1133
}
1134

1135
int pid_is_unwaited(pid_t pid) {
1136

1137
        if (pid == 0)
6,755✔
1138
                return true;
6,755✔
1139

1140
        return pidref_is_unwaited(&PIDREF_MAKE_FROM_PID(pid));
6,755✔
1141
}
1142

1143
int pid_is_alive(pid_t pid) {
1144
        int r;
11,204✔
1145

1146
        /* Checks whether a PID is still valid and not a zombie */
1147

1148
        if (pid < 0)
11,204✔
1149
                return -ESRCH;
1150

1151
        if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
11,203✔
1152
                return true;
1153

1154
        if (pid == getpid_cached())
11,203✔
1155
                return true;
1156

1157
        r = get_process_state(pid);
11,202✔
1158
        if (r == -ESRCH)
11,202✔
1159
                return false;
1160
        if (r < 0)
8,802✔
1161
                return r;
1162

1163
        return r != 'Z';
8,802✔
1164
}
1165

1166
int pidref_is_alive(const PidRef *pidref) {
1167
        int r, result;
11,200✔
1168

1169
        if (!pidref_is_set(pidref))
11,200✔
1170
                return -ESRCH;
1171

1172
        if (pidref_is_remote(pidref))
11,198✔
1173
                return -EREMOTE;
1174

1175
        result = pid_is_alive(pidref->pid);
11,198✔
1176
        if (result < 0) {
11,198✔
1177
                assert(result != -ESRCH);
×
1178
                return result;
1179
        }
1180

1181
        r = pidref_verify(pidref);
11,198✔
1182
        if (r == -ESRCH)
11,198✔
1183
                return false;
1184
        if (r < 0)
8,793✔
1185
                return r;
×
1186

1187
        return result;
1188
}
1189

1190
int pidref_from_same_root_fs(PidRef *a, PidRef *b) {
1191
        _cleanup_(pidref_done) PidRef self = PIDREF_NULL;
×
1192
        int r;
14,243✔
1193

1194
        /* Checks if the two specified processes have the same root fs. Either can be specified as NULL in
1195
         * which case we'll check against ourselves. */
1196

1197
        if (!a || !b) {
14,243✔
1198
                r = pidref_set_self(&self);
14,221✔
1199
                if (r < 0)
14,221✔
1200
                        return r;
1201
                if (!a)
14,221✔
1202
                        a = &self;
×
1203
                if (!b)
14,221✔
1204
                        b = &self;
14,221✔
1205
        }
1206

1207
        if (!pidref_is_set(a) || !pidref_is_set(b))
14,243✔
1208
                return -ESRCH;
×
1209

1210
        /* If one of the two processes have the same root they cannot have the same root fs, but if both of
1211
         * them do we don't know */
1212
        if (pidref_is_remote(a) && pidref_is_remote(b))
14,243✔
1213
                return -EREMOTE;
1214
        if (pidref_is_remote(a) || pidref_is_remote(b))
42,729✔
1215
                return false;
1216

1217
        if (pidref_equal(a, b))
14,243✔
1218
                return true;
1219

1220
        const char *roota = procfs_file_alloca(a->pid, "root");
14,129✔
1221
        const char *rootb = procfs_file_alloca(b->pid, "root");
14,129✔
1222

1223
        int result = inode_same(roota, rootb, 0);
14,129✔
1224
        if (result == -ENOENT)
14,129✔
1225
                return proc_mounted() == 0 ? -ENOSYS : -ESRCH;
×
1226
        if (result < 0)
14,129✔
1227
                return result;
1228

1229
        r = pidref_verify(a);
14,010✔
1230
        if (r < 0)
14,010✔
1231
                return r;
1232
        r = pidref_verify(b);
14,010✔
1233
        if (r < 0)
14,010✔
1234
                return r;
×
1235

1236
        return result;
1237
}
1238

1239
bool is_main_thread(void) {
1240
        static thread_local int cached = -1;
7,078,964✔
1241

1242
        if (cached < 0)
7,078,964✔
1243
                cached = getpid_cached() == gettid();
71,668✔
1244

1245
        return cached;
7,078,964✔
1246
}
1247

1248
bool oom_score_adjust_is_valid(int oa) {
1249
        return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
6,043✔
1250
}
1251

1252
unsigned long personality_from_string(const char *p) {
1253
        Architecture architecture;
9✔
1254

1255
        if (!p)
9✔
1256
                return PERSONALITY_INVALID;
1257

1258
        /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
1259
         * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
1260
         * the same register size. */
1261

1262
        architecture = architecture_from_string(p);
8✔
1263
        if (architecture < 0)
8✔
1264
                return PERSONALITY_INVALID;
1265

1266
        if (architecture == native_architecture())
6✔
1267
                return PER_LINUX;
1268
#ifdef ARCHITECTURE_SECONDARY
1269
        if (architecture == ARCHITECTURE_SECONDARY)
3✔
1270
                return PER_LINUX32;
2✔
1271
#endif
1272

1273
        return PERSONALITY_INVALID;
1274
}
1275

1276
const char* personality_to_string(unsigned long p) {
1277
        Architecture architecture = _ARCHITECTURE_INVALID;
1,469✔
1278

1279
        if (p == PER_LINUX)
1,469✔
1280
                architecture = native_architecture();
1281
#ifdef ARCHITECTURE_SECONDARY
1282
        else if (p == PER_LINUX32)
1,464✔
1283
                architecture = ARCHITECTURE_SECONDARY;
1284
#endif
1285

1286
        if (architecture < 0)
1287
                return NULL;
1288

1289
        return architecture_to_string(architecture);
7✔
1290
}
1291

1292
int safe_personality(unsigned long p) {
1293
        int ret;
1,536✔
1294

1295
        /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
1296
         * and in others as negative return value containing an errno-like value. Let's work around this: this is a
1297
         * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
1298
         * the return value indicating the same issue, so that we are definitely on the safe side.
1299
         *
1300
         * See https://github.com/systemd/systemd/issues/6737 */
1301

1302
        errno = 0;
1,536✔
1303
        ret = personality(p);
1,536✔
1304
        if (ret < 0) {
1,536✔
1305
                if (errno != 0)
12✔
1306
                        return -errno;
12✔
1307

1308
                errno = -ret;
×
1309
        }
1310

1311
        return ret;
1312
}
1313

1314
int opinionated_personality(unsigned long *ret) {
1315
        int current;
1,521✔
1316

1317
        /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1318
         * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1319
         * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1320

1321
        current = safe_personality(PERSONALITY_INVALID);
1,521✔
1322
        if (current < 0)
1,521✔
1323
                return current;
1324

1325
        if (((unsigned long) current & OPINIONATED_PERSONALITY_MASK) == PER_LINUX32)
1,521✔
1326
                *ret = PER_LINUX32;
×
1327
        else
1328
                *ret = PER_LINUX;
1,521✔
1329

1330
        return 0;
1331
}
1332

1333
void valgrind_summary_hack(void) {
1334
#if HAVE_VALGRIND_VALGRIND_H
1335
        if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
1336
                pid_t pid;
1337
                pid = raw_clone(SIGCHLD);
1338
                if (pid < 0)
1339
                        log_struct_errno(
1340
                                LOG_EMERG, errno,
1341
                                LOG_MESSAGE_ID(SD_MESSAGE_VALGRIND_HELPER_FORK_STR),
1342
                                LOG_MESSAGE("Failed to fork off valgrind helper: %m"));
1343
                else if (pid == 0)
1344
                        exit(EXIT_SUCCESS);
1345
                else {
1346
                        log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
1347
                        (void) wait_for_terminate(pid, NULL);
1348
                }
1349
        }
1350
#endif
1351
}
47✔
1352

1353
int pid_compare_func(const pid_t *a, const pid_t *b) {
1354
        /* Suitable for usage in qsort() */
1355
        return CMP(*a, *b);
1,498✔
1356
}
1357

1358
bool nice_is_valid(int n) {
1359
        return n >= PRIO_MIN && n < PRIO_MAX;
749✔
1360
}
1361

1362
bool sched_policy_is_valid(int i) {
1363
        return IN_SET(i, SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, SCHED_FIFO, SCHED_RR);
×
1364
}
1365

1366
bool sched_priority_is_valid(int i) {
1367
        return i >= 0 && i <= sched_get_priority_max(SCHED_RR);
×
1368
}
1369

1370
/* The cached PID, possible values:
1371
 *
1372
 *     == UNSET [0]  → cache not initialized yet
1373
 *     == BUSY [-1]  → some thread is initializing it at the moment
1374
 *     any other     → the cached PID
1375
 */
1376

1377
#define CACHED_PID_UNSET ((pid_t) 0)
1378
#define CACHED_PID_BUSY ((pid_t) -1)
1379

1380
static pid_t cached_pid = CACHED_PID_UNSET;
1381

1382
void reset_cached_pid(void) {
1383
        /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1384
        cached_pid = CACHED_PID_UNSET;
3,266✔
1385
}
3,266✔
1386

1387
pid_t getpid_cached(void) {
1388
        static bool installed = false;
92,389,690✔
1389
        pid_t current_value = CACHED_PID_UNSET;
92,389,690✔
1390

1391
        /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1392
         * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1393
         * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1394
         * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1395
         *
1396
         * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1397
         * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1398
         */
1399

1400
        (void) __atomic_compare_exchange_n(
92,389,690✔
1401
                        &cached_pid,
1402
                        &current_value,
1403
                        CACHED_PID_BUSY,
1404
                        false,
1405
                        __ATOMIC_SEQ_CST,
1406
                        __ATOMIC_SEQ_CST);
1407

1408
        switch (current_value) {
92,389,690✔
1409

1410
        case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
124,294✔
1411
                pid_t new_pid;
124,294✔
1412

1413
                new_pid = getpid();
124,294✔
1414

1415
                if (!installed) {
124,294✔
1416
                        /* __register_atfork() either returns 0 or -ENOMEM, in its glibc implementation. Since it's
1417
                         * only half-documented (glibc doesn't document it but LSB does — though only superficially)
1418
                         * we'll check for errors only in the most generic fashion possible. */
1419

1420
                        if (pthread_atfork(NULL, NULL, reset_cached_pid) != 0) {
93,899✔
1421
                                /* OOM? Let's try again later */
1422
                                cached_pid = CACHED_PID_UNSET;
×
1423
                                return new_pid;
×
1424
                        }
1425

1426
                        installed = true;
93,899✔
1427
                }
1428

1429
                cached_pid = new_pid;
124,294✔
1430
                return new_pid;
124,294✔
1431
        }
1432

1433
        case CACHED_PID_BUSY: /* Somebody else is currently initializing */
×
1434
                return getpid();
×
1435

1436
        default: /* Properly initialized */
1437
                return current_value;
1438
        }
1439
}
1440

1441
int must_be_root(void) {
1442

1443
        if (geteuid() == 0)
59✔
1444
                return 0;
1445

1446
        return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Need to be root.");
×
1447
}
1448

1449
pid_t clone_with_nested_stack(int (*fn)(void *), int flags, void *userdata) {
1450
        size_t ps;
2,964✔
1451
        pid_t pid;
2,964✔
1452
        void *mystack;
2,964✔
1453

1454
        /* A wrapper around glibc's clone() call that automatically sets up a "nested" stack. Only supports
1455
         * invocations without CLONE_VM, so that we can continue to use the parent's stack mapping.
1456
         *
1457
         * Note: glibc's clone() wrapper does not synchronize malloc() locks. This means that if the parent
1458
         * is threaded these locks will be in an undefined state in the child, and hence memory allocations
1459
         * are likely going to run into deadlocks. Hence: if you use this function make sure your parent is
1460
         * strictly single-threaded or your child never calls malloc(). */
1461

1462
        assert((flags & (CLONE_VM|CLONE_PARENT_SETTID|CLONE_CHILD_SETTID|
2,964✔
1463
                         CLONE_CHILD_CLEARTID|CLONE_SETTLS)) == 0);
1464

1465
        /* We allocate some space on the stack to use as the stack for the child (hence "nested"). Note that
1466
         * the net effect is that the child will have the start of its stack inside the stack of the parent,
1467
         * but since they are a CoW copy of each other that's fine. We allocate one page-aligned page. But
1468
         * since we don't want to deal with differences between systems where the stack grows backwards or
1469
         * forwards we'll allocate one more and place the stack address in the middle. Except that we also
1470
         * want it page aligned, hence we'll allocate one page more. Makes 3. */
1471

1472
        ps = page_size();
2,964✔
1473
        mystack = alloca(ps*3);
2,964✔
1474
        mystack = (uint8_t*) mystack + ps; /* move pointer one page ahead since stacks usually grow backwards */
2,964✔
1475
        mystack = (void*) ALIGN_TO((uintptr_t) mystack, ps); /* align to page size (moving things further ahead) */
2,964✔
1476

1477
#if HAVE_CLONE
1478
        pid = clone(fn, mystack, flags, userdata);
2,964✔
1479
#else
1480
        pid = __clone2(fn, mystack, ps, flags, userdata);
1481
#endif
1482
        if (pid < 0)
2,964✔
1483
                return -errno;
×
1484

1485
        return pid;
1486
}
1487

1488
static void restore_sigsetp(sigset_t **ssp) {
60,730✔
1489
        if (*ssp)
60,730✔
1490
                (void) sigprocmask(SIG_SETMASK, *ssp, NULL);
25,296✔
1491
}
60,730✔
1492

1493
static int fork_flags_to_signal(ForkFlags flags) {
29,569✔
1494
        return (flags & FORK_DEATHSIG_SIGTERM) ? SIGTERM :
29,569✔
1495
                (flags & FORK_DEATHSIG_SIGINT) ? SIGINT :
1,441✔
1496
                                                 SIGKILL;
1497
}
1498

1499
int pidref_safe_fork_full(
1500
                const char *name,
1501
                const int stdio_fds[3],
1502
                int except_fds[],
1503
                size_t n_except_fds,
1504
                ForkFlags flags,
1505
                PidRef *ret_pid) {
1506

1507
        pid_t original_pid, pid;
32,096✔
1508
        sigset_t saved_ss, ss;
32,096✔
1509
        _unused_ _cleanup_(restore_sigsetp) sigset_t *saved_ssp = NULL;
×
1510
        bool block_signals = false, block_all = false, intermediary = false;
32,096✔
1511
        _cleanup_close_pair_ int pidref_transport_fds[2] = EBADF_PAIR;
60,730✔
1512
        int prio, r;
32,096✔
1513

1514
        assert(!FLAGS_SET(flags, FORK_WAIT|FORK_FREEZE));
32,096✔
1515
        assert(!FLAGS_SET(flags, FORK_DETACH) ||
32,096✔
1516
               (flags & (FORK_WAIT|FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGINT|FORK_DEATHSIG_SIGKILL)) == 0);
1517

1518
        /* A wrapper around fork(), that does a couple of important initializations in addition to mere
1519
         * forking. If provided, ret_pid is initialized in both the parent and the child process, both times
1520
         * referencing the child process. Returns == 0 in the child and > 0 in the parent. */
1521

1522
        prio = flags & FORK_LOG ? LOG_ERR : LOG_DEBUG;
32,096✔
1523

1524
        original_pid = getpid_cached();
32,096✔
1525

1526
        if (flags & FORK_FLUSH_STDIO) {
32,096✔
1527
                fflush(stdout);
5✔
1528
                fflush(stderr); /* This one shouldn't be necessary, stderr should be unbuffered anyway, but let's better be safe than sorry */
5✔
1529
        }
1530

1531
        if (flags & (FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGINT)) {
32,096✔
1532
                /* We temporarily block all signals, so that the new child has them blocked initially. This
1533
                 * way, we can be sure that SIGTERMs are not lost we might send to the child. (Note that for
1534
                 * FORK_DEATHSIG_SIGKILL we don't bother, since it cannot be blocked anyway.) */
1535

1536
                assert_se(sigfillset(&ss) >= 0);
27,454✔
1537
                block_signals = block_all = true;
1538

1539
        } else if (flags & FORK_WAIT) {
4,642✔
1540
                /* Let's block SIGCHLD at least, so that we can safely watch for the child process */
1541

1542
                assert_se(sigemptyset(&ss) >= 0);
106✔
1543
                assert_se(sigaddset(&ss, SIGCHLD) >= 0);
106✔
1544
                block_signals = true;
1545
        }
1546

1547
        if (block_signals) {
1548
                if (sigprocmask(SIG_BLOCK, &ss, &saved_ss) < 0)
27,560✔
1549
                        return log_full_errno(prio, errno, "Failed to block signal mask: %m");
×
1550
                saved_ssp = &saved_ss;
27,560✔
1551
        }
1552

1553
        if (FLAGS_SET(flags, FORK_DETACH)) {
32,096✔
1554
                /* Fork off intermediary child if needed */
1555

1556
                r = is_reaper_process();
99✔
1557
                if (r < 0)
99✔
1558
                        return log_full_errno(prio, r, "Failed to determine if we are a reaper process: %m");
×
1559

1560
                if (!r) {
99✔
1561
                        /* Not a reaper process, hence do a double fork() so we are reparented to one */
1562

1563
                        if (ret_pid && socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pidref_transport_fds) < 0)
11✔
1564
                                return log_full_errno(prio, errno, "Failed to allocate pidref socket: %m");
×
1565

1566
                        pid = fork();
11✔
1567
                        if (pid < 0)
28✔
1568
                                return log_full_errno(prio, errno, "Failed to fork off '%s': %m", strna(name));
×
1569
                        if (pid > 0) {
28✔
1570
                                log_debug("Successfully forked off intermediary '%s' as PID " PID_FMT ".", strna(name), pid);
11✔
1571

1572
                                pidref_transport_fds[1] = safe_close(pidref_transport_fds[1]);
11✔
1573

1574
                                if (pidref_transport_fds[0] >= 0) {
11✔
1575
                                        /* Wait for the intermediary child to exit so the caller can be certain the actual child
1576
                                         * process has been reparented by the time this function returns. */
1577
                                        r = wait_for_terminate_and_check(name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0);
20✔
1578
                                        if (r < 0)
10✔
1579
                                                return log_full_errno(prio, r, "Failed to wait for intermediary process: %m");
×
1580
                                        if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */
10✔
1581
                                                return -EPROTO;
1582

1583
                                        int pidfd;
10✔
1584
                                        ssize_t n = receive_one_fd_iov(
20✔
1585
                                                        pidref_transport_fds[0],
1586
                                                        &IOVEC_MAKE(&pid, sizeof(pid)),
10✔
1587
                                                        /* iovlen= */ 1,
1588
                                                        /* flags= */ 0,
1589
                                                        &pidfd);
1590
                                        if (n < 0)
10✔
1591
                                                return log_full_errno(prio, n, "Failed to receive child pidref: %m");
×
1592

1593
                                        *ret_pid = (PidRef) { .pid = pid, .fd = pidfd };
10✔
1594
                                }
1595

1596
                                return 1; /* return in the parent */
11✔
1597
                        }
1598

1599
                        pidref_transport_fds[0] = safe_close(pidref_transport_fds[0]);
17✔
1600
                        intermediary = true;
17✔
1601
                }
1602
        }
1603

1604
        if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS|FORK_NEW_NETNS|FORK_NEW_PIDNS)) != 0)
32,102✔
1605
                pid = raw_clone(SIGCHLD|
10,085✔
1606
                                (FLAGS_SET(flags, FORK_NEW_MOUNTNS) ? CLONE_NEWNS : 0) |
10,085✔
1607
                                (FLAGS_SET(flags, FORK_NEW_USERNS) ? CLONE_NEWUSER : 0) |
10,085✔
1608
                                (FLAGS_SET(flags, FORK_NEW_NETNS) ? CLONE_NEWNET : 0) |
10,085✔
1609
                                (FLAGS_SET(flags, FORK_NEW_PIDNS) ? CLONE_NEWPID : 0));
10,085✔
1610
        else
1611
                pid = fork();
22,017✔
1612
        if (pid < 0)
60,730✔
1613
                return log_full_errno(prio, errno, "Failed to fork off '%s': %m", strna(name));
2✔
1614
        if (pid > 0) {
60,729✔
1615

1616
                /* If we are in the intermediary process, exit now */
1617
                if (intermediary) {
29,831✔
1618
                        if (pidref_transport_fds[1] >= 0) {
11✔
1619
                                _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
10✔
1620

1621
                                r = pidref_set_pid(&pidref, pid);
10✔
1622
                                if (r < 0) {
10✔
1623
                                        log_full_errno(prio, r, "Failed to open reference to PID "PID_FMT": %m", pid);
×
1624
                                        _exit(EXIT_FAILURE);
×
1625
                                }
1626

1627
                                r = send_one_fd_iov(
10✔
1628
                                                pidref_transport_fds[1],
1629
                                                pidref.fd,
1630
                                                &IOVEC_MAKE(&pidref.pid, sizeof(pidref.pid)),
1631
                                                /* iovlen= */ 1,
1632
                                                /* flags= */ 0);
1633
                                if (r < 0) {
10✔
1634
                                        log_full_errno(prio, r, "Failed to send child pidref: %m");
×
1635
                                        _exit(EXIT_FAILURE);
×
1636
                                }
1637
                        }
1638

1639
                        _exit(EXIT_SUCCESS);
11✔
1640
                }
1641

1642
                /* We are in the parent process */
1643
                log_debug("Successfully forked off '%s' as PID " PID_FMT ".", strna(name), pid);
29,820✔
1644

1645
                if (flags & FORK_WAIT) {
29,820✔
1646
                        if (block_all) {
3,262✔
1647
                                /* undo everything except SIGCHLD */
1648
                                ss = saved_ss;
3,156✔
1649
                                assert_se(sigaddset(&ss, SIGCHLD) >= 0);
3,156✔
1650
                                (void) sigprocmask(SIG_SETMASK, &ss, NULL);
3,156✔
1651
                        }
1652

1653
                        r = wait_for_terminate_and_check(name, pid, (flags & FORK_LOG ? WAIT_LOG : 0));
6,056✔
1654
                        if (r < 0)
3,262✔
1655
                                return r;
1656
                        if (r != EXIT_SUCCESS) /* exit status > 0 should be treated as failure, too */
3,262✔
1657
                                return -EPROTO;
1658

1659
                        /* If we are in the parent and successfully waited, then the process doesn't exist anymore. */
1660
                        if (ret_pid)
3,262✔
1661
                                *ret_pid = PIDREF_NULL;
14✔
1662

1663
                        return 1;
3,262✔
1664
                }
1665

1666
                if (ret_pid) {
26,558✔
1667
                        if (FLAGS_SET(flags, FORK_PID_ONLY))
26,064✔
1668
                                *ret_pid = PIDREF_MAKE_FROM_PID(pid);
7,408✔
1669
                        else {
1670
                                r = pidref_set_pid(ret_pid, pid);
18,656✔
1671
                                if (r < 0) /* Let's not fail for this, no matter what, the process exists after all, and that's key */
18,656✔
1672
                                        *ret_pid = PIDREF_MAKE_FROM_PID(pid);
×
1673
                        }
1674
                }
1675

1676
                return 1;
26,558✔
1677
        }
1678

1679
        /* We are in the child process */
1680

1681
        pidref_transport_fds[1] = safe_close(pidref_transport_fds[1]);
30,898✔
1682

1683
        /* Restore signal mask manually */
1684
        saved_ssp = NULL;
30,898✔
1685

1686
        if (flags & FORK_REOPEN_LOG) {
30,898✔
1687
                /* Close the logs if requested, before we log anything. And make sure we reopen it if needed. */
1688
                log_close();
2,899✔
1689
                log_set_open_when_needed(true);
2,899✔
1690
                log_settle_target();
2,899✔
1691
        }
1692

1693
        if (name) {
30,898✔
1694
                r = rename_process(name);
30,898✔
1695
                if (r < 0)
30,898✔
1696
                        log_full_errno(flags & FORK_LOG ? LOG_WARNING : LOG_DEBUG,
×
1697
                                       r, "Failed to rename process, ignoring: %m");
1698
        }
1699

1700
        if (flags & (FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGINT|FORK_DEATHSIG_SIGKILL))
30,898✔
1701
                if (prctl(PR_SET_PDEATHSIG, fork_flags_to_signal(flags)) < 0) {
29,569✔
1702
                        log_full_errno(prio, errno, "Failed to set death signal: %m");
×
1703
                        _exit(EXIT_FAILURE);
×
1704
                }
1705

1706
        if (flags & FORK_RESET_SIGNALS) {
30,898✔
1707
                r = reset_all_signal_handlers();
23,828✔
1708
                if (r < 0) {
23,828✔
1709
                        log_full_errno(prio, r, "Failed to reset signal handlers: %m");
×
1710
                        _exit(EXIT_FAILURE);
×
1711
                }
1712

1713
                /* This implicitly undoes the signal mask stuff we did before the fork()ing above */
1714
                r = reset_signal_mask();
23,828✔
1715
                if (r < 0) {
23,828✔
1716
                        log_full_errno(prio, r, "Failed to reset signal mask: %m");
×
1717
                        _exit(EXIT_FAILURE);
×
1718
                }
1719
        } else if (block_signals) { /* undo what we did above */
7,070✔
1720
                if (sigprocmask(SIG_SETMASK, &saved_ss, NULL) < 0) {
6,671✔
1721
                        log_full_errno(prio, errno, "Failed to restore signal mask: %m");
×
1722
                        _exit(EXIT_FAILURE);
×
1723
                }
1724
        }
1725

1726
        if (flags & (FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGKILL|FORK_DEATHSIG_SIGINT)) {
30,898✔
1727
                pid_t ppid;
29,569✔
1728
                /* Let's see if the parent PID is still the one we started from? If not, then the parent
1729
                 * already died by the time we set PR_SET_PDEATHSIG, hence let's emulate the effect */
1730

1731
                ppid = getppid();
29,569✔
1732
                if (ppid == 0)
29,569✔
1733
                        /* Parent is in a different PID namespace. */;
1734
                else if (ppid != original_pid) {
29,530✔
1735
                        int sig = fork_flags_to_signal(flags);
×
1736
                        log_debug("Parent died early, raising %s.", signal_to_string(sig));
×
1737
                        (void) raise(sig);
×
1738
                        _exit(EXIT_FAILURE);
×
1739
                }
1740
        }
1741

1742
        if (FLAGS_SET(flags, FORK_NEW_MOUNTNS | FORK_MOUNTNS_SLAVE)) {
30,898✔
1743
                /* Optionally, make sure we never propagate mounts to the host. */
1744
                if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
135✔
1745
                        log_full_errno(prio, errno, "Failed to remount root directory as MS_SLAVE: %m");
×
1746
                        _exit(EXIT_FAILURE);
×
1747
                }
1748
        }
1749

1750
        if (FLAGS_SET(flags, FORK_PRIVATE_TMP)) {
30,898✔
1751
                assert(FLAGS_SET(flags, FORK_NEW_MOUNTNS));
×
1752

1753
                /* Optionally, overmount new tmpfs instance on /tmp/. */
1754
                r = mount_nofollow("tmpfs", "/tmp", "tmpfs",
×
1755
                                   MS_NOSUID|MS_NODEV,
1756
                                   "mode=01777" TMPFS_LIMITS_RUN);
1757
                if (r < 0) {
×
1758
                        log_full_errno(prio, r, "Failed to overmount /tmp/: %m");
×
1759
                        _exit(EXIT_FAILURE);
×
1760
                }
1761
        }
1762

1763
        if (flags & FORK_REARRANGE_STDIO) {
30,898✔
1764
                if (stdio_fds) {
14,546✔
1765
                        r = rearrange_stdio(stdio_fds[0], stdio_fds[1], stdio_fds[2]);
14,536✔
1766
                        if (r < 0) {
14,536✔
1767
                                log_full_errno(prio, r, "Failed to rearrange stdio fds: %m");
×
1768
                                _exit(EXIT_FAILURE);
×
1769
                        }
1770

1771
                        /* Turn off O_NONBLOCK on the fdio fds, in case it was left on */
1772
                        stdio_disable_nonblock();
14,536✔
1773
                } else {
1774
                        r = make_null_stdio();
10✔
1775
                        if (r < 0) {
10✔
1776
                                log_full_errno(prio, r, "Failed to connect stdin/stdout to /dev/null: %m");
×
1777
                                _exit(EXIT_FAILURE);
×
1778
                        }
1779
                }
1780
        } else if (flags & FORK_STDOUT_TO_STDERR) {
16,352✔
1781
                if (dup2(STDERR_FILENO, STDOUT_FILENO) < 0) {
2✔
1782
                        log_full_errno(prio, errno, "Failed to connect stdout to stderr: %m");
×
1783
                        _exit(EXIT_FAILURE);
×
1784
                }
1785
        }
1786

1787
        if (flags & FORK_CLOSE_ALL_FDS) {
30,898✔
1788
                /* Close the logs here in case it got reopened above, as close_all_fds() would close them for us */
1789
                log_close();
22,850✔
1790

1791
                r = close_all_fds(except_fds, n_except_fds);
22,850✔
1792
                if (r < 0) {
22,850✔
1793
                        log_full_errno(prio, r, "Failed to close all file descriptors: %m");
×
1794
                        _exit(EXIT_FAILURE);
×
1795
                }
1796
        }
1797

1798
        if (flags & FORK_PACK_FDS) {
30,898✔
1799
                /* FORK_CLOSE_ALL_FDS ensures that except_fds are the only FDs >= 3 that are
1800
                 * open, this is including the log. This is required by pack_fds, which will
1801
                 * get stuck in an infinite loop of any FDs other than except_fds are open. */
1802
                assert(FLAGS_SET(flags, FORK_CLOSE_ALL_FDS));
87✔
1803

1804
                r = pack_fds(except_fds, n_except_fds);
87✔
1805
                if (r < 0) {
87✔
1806
                        log_full_errno(prio, r, "Failed to pack file descriptors: %m");
×
1807
                        _exit(EXIT_FAILURE);
×
1808
                }
1809
        }
1810

1811
        if (flags & FORK_CLOEXEC_OFF) {
30,898✔
1812
                r = fd_cloexec_many(except_fds, n_except_fds, false);
96✔
1813
                if (r < 0) {
96✔
1814
                        log_full_errno(prio, r, "Failed to turn off O_CLOEXEC on file descriptors: %m");
×
1815
                        _exit(EXIT_FAILURE);
×
1816
                }
1817
        }
1818

1819
        /* When we were asked to reopen the logs, do so again now */
1820
        if (flags & FORK_REOPEN_LOG) {
30,898✔
1821
                log_open();
2,899✔
1822
                log_set_open_when_needed(false);
2,899✔
1823
        }
1824

1825
        if (flags & FORK_RLIMIT_NOFILE_SAFE) {
30,898✔
1826
                r = rlimit_nofile_safe();
18,744✔
1827
                if (r < 0) {
18,744✔
1828
                        log_full_errno(prio, r, "Failed to lower RLIMIT_NOFILE's soft limit to 1K: %m");
×
1829
                        _exit(EXIT_FAILURE);
×
1830
                }
1831
        }
1832

1833
        if (!FLAGS_SET(flags, FORK_KEEP_NOTIFY_SOCKET)) {
30,898✔
1834
                r = RET_NERRNO(unsetenv("NOTIFY_SOCKET"));
30,898✔
1835
                if (r < 0) {
×
1836
                        log_full_errno(prio, r, "Failed to unset $NOTIFY_SOCKET: %m");
×
1837
                        _exit(EXIT_FAILURE);
×
1838
                }
1839
        }
1840

1841
        if (FLAGS_SET(flags, FORK_FREEZE))
30,898✔
1842
                freeze();
×
1843

1844
        if (ret_pid) {
30,898✔
1845
                if (FLAGS_SET(flags, FORK_PID_ONLY))
26,808✔
1846
                        *ret_pid = PIDREF_MAKE_FROM_PID(getpid_cached());
7,418✔
1847
                else {
1848
                        r = pidref_set_self(ret_pid);
19,390✔
1849
                        if (r < 0) {
19,390✔
1850
                                log_full_errno(prio, r, "Failed to acquire PID reference on ourselves: %m");
×
1851
                                _exit(EXIT_FAILURE);
×
1852
                        }
1853
                }
1854
        }
1855

1856
        return 0;
1857
}
1858

1859
int safe_fork_full(
1860
                const char *name,
1861
                const int stdio_fds[3],
1862
                int except_fds[],
1863
                size_t n_except_fds,
1864
                ForkFlags flags,
1865
                pid_t *ret_pid) {
1866

1867
        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
22,609✔
1868
        int r;
13,424✔
1869

1870
        /* Getting the detached child process pid without pidfd is racy, so don't allow it if not returning
1871
         * a pidref to the caller. */
1872
        assert(!FLAGS_SET(flags, FORK_DETACH) || !ret_pid);
13,424✔
1873

1874
        r = pidref_safe_fork_full(name, stdio_fds, except_fds, n_except_fds, flags|FORK_PID_ONLY, ret_pid ? &pidref : NULL);
19,191✔
1875
        if (r < 0 || !ret_pid)
22,609✔
1876
                return r;
1877

1878
        *ret_pid = pidref.pid;
14,838✔
1879

1880
        return r;
14,838✔
1881
}
1882

1883
int namespace_fork(
1884
                const char *outer_name,
1885
                const char *inner_name,
1886
                int except_fds[],
1887
                size_t n_except_fds,
1888
                ForkFlags flags,
1889
                int pidns_fd,
1890
                int mntns_fd,
1891
                int netns_fd,
1892
                int userns_fd,
1893
                int root_fd,
1894
                pid_t *ret_pid) {
1895

1896
        int r;
165✔
1897

1898
        /* This is much like safe_fork(), but forks twice, and joins the specified namespaces in the middle
1899
         * process. This ensures that we are fully a member of the destination namespace, with pidns an all, so that
1900
         * /proc/self/fd works correctly. */
1901

1902
        r = safe_fork_full(outer_name,
483✔
1903
                           NULL,
1904
                           except_fds, n_except_fds,
1905
                           (flags|FORK_DEATHSIG_SIGINT|FORK_DEATHSIG_SIGTERM|FORK_DEATHSIG_SIGKILL) & ~(FORK_REOPEN_LOG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE), ret_pid);
165✔
1906
        if (r < 0)
318✔
1907
                return r;
1908
        if (r == 0) {
318✔
1909
                pid_t pid;
153✔
1910

1911
                /* Child */
1912

1913
                r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd);
153✔
1914
                if (r < 0) {
153✔
1915
                        log_full_errno(FLAGS_SET(flags, FORK_LOG) ? LOG_ERR : LOG_DEBUG, r, "Failed to join namespace: %m");
×
1916
                        _exit(EXIT_FAILURE);
×
1917
                }
1918

1919
                /* We mask a few flags here that either make no sense for the grandchild, or that we don't have to do again */
1920
                r = safe_fork_full(inner_name,
460✔
1921
                                   NULL,
1922
                                   except_fds, n_except_fds,
1923
                                   flags & ~(FORK_WAIT|FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_REARRANGE_STDIO), &pid);
153✔
1924
                if (r < 0)
307✔
1925
                        _exit(EXIT_FAILURE);
×
1926
                if (r == 0) {
307✔
1927
                        /* Child */
1928
                        if (ret_pid)
154✔
1929
                                *ret_pid = pid;
154✔
1930
                        return 0;
154✔
1931
                }
1932

1933
                r = wait_for_terminate_and_check(inner_name, pid, FLAGS_SET(flags, FORK_LOG) ? WAIT_LOG : 0);
306✔
1934
                if (r < 0)
153✔
1935
                        _exit(EXIT_FAILURE);
×
1936

1937
                _exit(r);
153✔
1938
        }
1939

1940
        return 1;
1941
}
1942

1943
int set_oom_score_adjust(int value) {
1944
        char t[DECIMAL_STR_MAX(int)];
4,165✔
1945

1946
        if (!oom_score_adjust_is_valid(value))
4,165✔
1947
                return -EINVAL;
4,165✔
1948

1949
        xsprintf(t, "%i", value);
4,165✔
1950

1951
        return write_string_file("/proc/self/oom_score_adj", t,
4,165✔
1952
                                 WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);
1953
}
1954

1955
int get_oom_score_adjust(int *ret) {
1956
        _cleanup_free_ char *t = NULL;
1,188✔
1957
        int r, a;
1,188✔
1958

1959
        r = read_virtual_file("/proc/self/oom_score_adj", SIZE_MAX, &t, NULL);
1,188✔
1960
        if (r < 0)
1,188✔
1961
                return r;
1962

1963
        delete_trailing_chars(t, WHITESPACE);
1,188✔
1964

1965
        r = safe_atoi(t, &a);
1,188✔
1966
        if (r < 0)
1,188✔
1967
                return r;
1968

1969
        if (!oom_score_adjust_is_valid(a))
1,188✔
1970
                return -ENODATA;
1971

1972
        if (ret)
1,188✔
1973
                *ret = a;
1,188✔
1974

1975
        return 0;
1976
}
1977

1978
static int rlimit_to_nice(rlim_t limit) {
2✔
1979
        if (limit <= 1)
2✔
1980
                return PRIO_MAX-1; /* i.e. 19 */
1981

1982
        if (limit >= -PRIO_MIN + PRIO_MAX)
2✔
1983
                return PRIO_MIN; /* i.e. -20 */
1984

1985
        return PRIO_MAX - (int) limit;
2✔
1986
}
1987

1988
int setpriority_closest(int priority) {
1989
        struct rlimit highest;
22✔
1990
        int r, current, limit;
22✔
1991

1992
        /* Try to set requested nice level */
1993
        r = RET_NERRNO(setpriority(PRIO_PROCESS, 0, priority));
22✔
1994
        if (r >= 0)
2✔
1995
                return 1;
20✔
1996
        if (!ERRNO_IS_NEG_PRIVILEGE(r))
2✔
1997
                return r;
1998

1999
        errno = 0;
2✔
2000
        current = getpriority(PRIO_PROCESS, 0);
2✔
2001
        if (errno != 0)
2✔
2002
                return -errno;
×
2003

2004
        if (priority == current)
2✔
2005
                return 1;
2006

2007
       /* Hmm, we'd expect that raising the nice level from our status quo would always work. If it doesn't,
2008
        * then the whole setpriority() system call is blocked to us, hence let's propagate the error
2009
        * right-away */
2010
        if (priority > current)
2✔
2011
                return r;
2012

2013
        if (getrlimit(RLIMIT_NICE, &highest) < 0)
2✔
2014
                return -errno;
×
2015

2016
        limit = rlimit_to_nice(highest.rlim_cur);
2✔
2017

2018
        /* Push to the allowed limit if we're higher than that. Note that we could also be less nice than
2019
         * limit allows us, but still higher than what's requested. In that case our current value is
2020
         * the best choice. */
2021
        if (current > limit)
2✔
2022
                if (setpriority(PRIO_PROCESS, 0, limit) < 0)
2✔
2023
                        return -errno;
×
2024

2025
        log_debug("Cannot set requested nice level (%i), using next best (%i).", priority, MIN(current, limit));
2✔
2026
        return 0;
2027
}
2028

2029
_noreturn_ void freeze(void) {
2030
        log_close();
×
2031

2032
        /* Make sure nobody waits for us (i.e. on one of our sockets) anymore. Note that we use
2033
         * close_all_fds_without_malloc() instead of plain close_all_fds() here, since we want this function
2034
         * to be compatible with being called from signal handlers. */
2035
        (void) close_all_fds_without_malloc(NULL, 0);
×
2036

2037
        /* Let's not freeze right away, but keep reaping zombies. */
2038
        for (;;) {
×
2039
                siginfo_t si = {};
×
2040

2041
                if (waitid(P_ALL, 0, &si, WEXITED) < 0 && errno != EINTR)
×
2042
                        break;
2043
        }
2044

2045
        /* waitid() failed with an ECHLD error (because there are no left-over child processes) or any other
2046
         * (unexpected) error. Freeze for good now! */
2047
        for (;;)
×
2048
                pause();
×
2049
}
2050

2051
int get_process_threads(pid_t pid) {
2052
        _cleanup_free_ char *t = NULL;
7✔
2053
        int n, r;
7✔
2054

2055
        if (pid < 0)
7✔
2056
                return -EINVAL;
2057

2058
        r = procfs_file_get_field(pid, "status", "Threads", &t);
7✔
2059
        if (r == -ENOENT)
7✔
2060
                return -ESRCH;
2061
        if (r < 0)
7✔
2062
                return r;
2063

2064
        r = safe_atoi(t, &n);
7✔
2065
        if (r < 0)
7✔
2066
                return r;
2067
        if (n < 0)
7✔
2068
                return -EINVAL;
×
2069

2070
        return n;
2071
}
2072

2073
int is_reaper_process(void) {
2074
        int b = 0;
3,067✔
2075

2076
        /* Checks if we are running in a reaper process, i.e. if we are expected to deal with processes
2077
         * reparented to us. This simply checks if we are PID 1 or if PR_SET_CHILD_SUBREAPER was called. */
2078

2079
        if (getpid_cached() == 1)
3,067✔
2080
                return true;
3,067✔
2081

2082
        if (prctl(PR_GET_CHILD_SUBREAPER, (unsigned long) &b, 0UL, 0UL, 0UL) < 0)
318✔
2083
                return -errno;
×
2084

2085
        return b != 0;
318✔
2086
}
2087

2088
int make_reaper_process(bool b) {
2089

2090
        if (getpid_cached() == 1) {
601✔
2091

2092
                if (!b)
60✔
2093
                        return -EINVAL;
2094

2095
                return 0;
60✔
2096
        }
2097

2098
        /* Some prctl()s insist that all 5 arguments are specified, others do not. Let's always specify all,
2099
         * to avoid any ambiguities */
2100
        if (prctl(PR_SET_CHILD_SUBREAPER, (unsigned long) b, 0UL, 0UL, 0UL) < 0)
541✔
2101
                return -errno;
×
2102

2103
        return 0;
2104
}
2105

2106
DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(posix_spawnattr_t*, posix_spawnattr_destroy, NULL);
×
2107

2108
int posix_spawn_wrapper(
2109
                const char *path,
2110
                char * const *argv,
2111
                char * const *envp,
2112
                const char *cgroup,
2113
                PidRef *ret_pidref) {
2114

2115
        short flags = POSIX_SPAWN_SETSIGMASK;
2,232✔
2116
        posix_spawnattr_t attr;
2,232✔
2117
        sigset_t mask;
2,232✔
2118
        int r;
2,232✔
2119

2120
        /* Forks and invokes 'path' with 'argv' and 'envp' using CLONE_VM and CLONE_VFORK, which means the
2121
         * caller will be blocked until the child either exits or exec's. The memory of the child will be
2122
         * fully shared with the memory of the parent, so that there are no copy-on-write or memory.max
2123
         * issues.
2124
         *
2125
         * Also, move the newly-created process into 'cgroup' through POSIX_SPAWN_SETCGROUP (clone3())
2126
         * if available.
2127
         * returns 1: We're already in the right cgroup
2128
         *         0: 'cgroup' not specified or POSIX_SPAWN_SETCGROUP is not supported. The caller
2129
         *            needs to call 'cg_attach' on their own */
2130

2131
        assert(path);
2,232✔
2132
        assert(argv);
2,232✔
2133
        assert(ret_pidref);
2,232✔
2134

2135
        assert_se(sigfillset(&mask) >= 0);
2,232✔
2136

2137
        r = posix_spawnattr_init(&attr);
2,232✔
2138
        if (r != 0)
2,232✔
2139
                return -r; /* These functions return a positive errno on failure */
2,232✔
2140

2141
        /* Initialization needs to succeed before we can set up a destructor. */
2142
        _unused_ _cleanup_(posix_spawnattr_destroyp) posix_spawnattr_t *attr_destructor = &attr;
4,464✔
2143

2144
#if HAVE_PIDFD_SPAWN
2145
        static bool have_clone_into_cgroup = true; /* kernel 5.7+ */
2,232✔
2146
        _cleanup_close_ int cgroup_fd = -EBADF;
2,232✔
2147

2148
        if (cgroup && have_clone_into_cgroup) {
2,232✔
2149
                _cleanup_free_ char *resolved_cgroup = NULL;
2,232✔
2150

2151
                r = cg_get_path_and_check(
2,232✔
2152
                                SYSTEMD_CGROUP_CONTROLLER,
2153
                                cgroup,
2154
                                /* suffix= */ NULL,
2155
                                &resolved_cgroup);
2156
                if (r < 0)
2,232✔
2157
                        return r;
2158

2159
                cgroup_fd = open(resolved_cgroup, O_PATH|O_DIRECTORY|O_CLOEXEC);
2,232✔
2160
                if (cgroup_fd < 0)
2,232✔
2161
                        return -errno;
×
2162

2163
                r = posix_spawnattr_setcgroup_np(&attr, cgroup_fd);
2,232✔
2164
                if (r != 0)
2,232✔
2165
                        return -r;
×
2166

2167
                flags |= POSIX_SPAWN_SETCGROUP;
2,232✔
2168
        }
2169
#endif
2170

2171
        r = posix_spawnattr_setflags(&attr, flags);
2,232✔
2172
        if (r != 0)
2,232✔
2173
                return -r;
×
2174
        r = posix_spawnattr_setsigmask(&attr, &mask);
2,232✔
2175
        if (r != 0)
2,232✔
2176
                return -r;
×
2177

2178
#if HAVE_PIDFD_SPAWN
2179
        _cleanup_close_ int pidfd = -EBADF;
2,232✔
2180

2181
        r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
2,232✔
2182
        if (ERRNO_IS_NOT_SUPPORTED(r) && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) && cg_is_threaded(cgroup) > 0)
2,232✔
2183
                return -EUCLEAN; /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode,
2184
                                    turn that into something recognizable */
2185
        if ((ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || r == E2BIG) &&
2,232✔
2186
            FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) {
2187
                /* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but
2188
                 * need to disable POSIX_SPAWN_SETCGROUP, which is what redirects to clone3().
2189
                 * Note that we might get E2BIG here since some kernels (e.g. 5.4) support clone3()
2190
                 * but not CLONE_INTO_CGROUP. */
2191

2192
                /* CLONE_INTO_CGROUP definitely won't work, hence remember the fact so that we don't
2193
                 * retry every time. */
2194
                have_clone_into_cgroup = false;
×
2195

2196
                flags &= ~POSIX_SPAWN_SETCGROUP;
×
2197
                r = posix_spawnattr_setflags(&attr, flags);
×
2198
                if (r != 0)
×
2199
                        return -r;
×
2200

2201
                r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
×
2202
        }
2203
        if (r != 0)
2,232✔
2204
                return -r;
×
2205

2206
        r = pidref_set_pidfd_consume(ret_pidref, TAKE_FD(pidfd));
2,232✔
2207
        if (r < 0)
2,232✔
2208
                return r;
2209

2210
        return FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP);
2,232✔
2211
#else
2212
        pid_t pid;
2213

2214
        r = posix_spawn(&pid, path, NULL, &attr, argv, envp);
2215
        if (r != 0)
2216
                return -r;
2217

2218
        r = pidref_set_pid(ret_pidref, pid);
2219
        if (r < 0)
2220
                return r;
2221

2222
        return 0; /* We did not use CLONE_INTO_CGROUP so return 0, the caller will have to move the child */
2223
#endif
2224
}
2225

2226
int proc_dir_open(DIR **ret) {
2227
        DIR *d;
11✔
2228

2229
        assert(ret);
11✔
2230

2231
        d = opendir("/proc");
11✔
2232
        if (!d)
11✔
2233
                return -errno;
×
2234

2235
        *ret = d;
11✔
2236
        return 0;
11✔
2237
}
2238

2239
int proc_dir_read(DIR *d, pid_t *ret) {
2240
        assert(d);
978✔
2241

2242
        for (;;) {
1,642✔
2243
                struct dirent *de;
1,642✔
2244

2245
                errno = 0;
1,642✔
2246
                de = readdir_no_dot(d);
1,642✔
2247
                if (!de) {
1,642✔
2248
                        if (errno != 0)
11✔
2249
                                return -errno;
×
2250

2251
                        break;
11✔
2252
                }
2253

2254
                if (!IN_SET(de->d_type, DT_DIR, DT_UNKNOWN))
1,631✔
2255
                        continue;
543✔
2256

2257
                if (parse_pid(de->d_name, ret) >= 0)
1,088✔
2258
                        return 1;
2259
        }
2260

2261
        if (ret)
11✔
2262
                *ret = 0;
11✔
2263
        return 0;
2264
}
2265

2266
int proc_dir_read_pidref(DIR *d, PidRef *ret) {
2267
        int r;
931✔
2268

2269
        assert(d);
931✔
2270

2271
        for (;;) {
931✔
2272
                pid_t pid;
931✔
2273

2274
                r = proc_dir_read(d, &pid);
931✔
2275
                if (r < 0)
931✔
2276
                        return r;
921✔
2277
                if (r == 0)
931✔
2278
                        break;
2279

2280
                r = pidref_set_pid(ret, pid);
921✔
2281
                if (r == -ESRCH) /* gone by now? skip it */
921✔
2282
                        continue;
×
2283
                if (r < 0)
921✔
2284
                        return r;
×
2285

2286
                return 1;
2287
        }
2288

2289
        if (ret)
10✔
2290
                *ret = PIDREF_NULL;
10✔
2291
        return 0;
2292
}
2293

2294
static const char *const sigchld_code_table[] = {
2295
        [CLD_EXITED] = "exited",
2296
        [CLD_KILLED] = "killed",
2297
        [CLD_DUMPED] = "dumped",
2298
        [CLD_TRAPPED] = "trapped",
2299
        [CLD_STOPPED] = "stopped",
2300
        [CLD_CONTINUED] = "continued",
2301
};
2302

2303
DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
7,679✔
2304

2305
static const char* const sched_policy_table[] = {
2306
        [SCHED_OTHER] = "other",
2307
        [SCHED_BATCH] = "batch",
2308
        [SCHED_IDLE] = "idle",
2309
        [SCHED_FIFO] = "fifo",
2310
        [SCHED_RR] = "rr",
2311
};
2312

2313
DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);
3✔
2314

2315
_noreturn_ void report_errno_and_exit(int errno_fd, int error) {
2316
        int r;
48✔
2317

2318
        if (error >= 0)
48✔
2319
                _exit(EXIT_SUCCESS);
47✔
2320

2321
        assert(errno_fd >= 0);
1✔
2322

2323
        r = loop_write(errno_fd, &error, sizeof(error));
1✔
2324
        if (r < 0)
1✔
2325
                log_debug_errno(r, "Failed to write errno to errno_fd=%d: %m", errno_fd);
×
2326

2327
        _exit(EXIT_FAILURE);
1✔
2328
}
2329

2330
int read_errno(int errno_fd) {
2331
        int r;
1✔
2332

2333
        assert(errno_fd >= 0);
1✔
2334

2335
        /* The issue here is that it's impossible to distinguish between an error code returned by child and
2336
         * IO error arose when reading it. So, the function logs errors and return EIO for the later case. */
2337

2338
        ssize_t n = loop_read(errno_fd, &r, sizeof(r), /* do_poll = */ false);
1✔
2339
        if (n < 0) {
1✔
2340
                log_debug_errno(n, "Failed to read errno: %m");
×
2341
                return -EIO;
×
2342
        }
2343
        if (n == sizeof(r)) {
1✔
2344
                if (r == 0)
×
2345
                        return 0;
2346
                if (r < 0) /* child process reported an error, return it */
×
2347
                        return log_debug_errno(r, "Child process failed with errno: %m");
×
2348
                return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Received an errno, but it's a positive value.");
×
2349
        }
2350
        if (n != 0)
1✔
2351
                return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Received unexpected amount of bytes while reading errno.");
×
2352

2353
        /* the process exited without reporting an error, assuming success */
2354
        return 0;
2355
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc