• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemd / systemd / 22810165351

07 Mar 2026 03:22PM UTC coverage: 72.6% (-0.03%) from 72.63%
22810165351

push

github

web-flow
user-record: fix segfault when processing matchHostname field (#40979)

Fix a typo which causes a segfault when processing a user record
with `matchHostname` when it's an array instead of a simple string:

```
$ echo '{"userName":"crashhostarray","perMachine":[{"matchHostname":["host1","host2"],"locked":false}]}' | userdbctl -F -
Segmentation fault         (core dumped)

$ coredumpctl info
...
       Message: Process 1172301 (userdbctl) of user 1000 dumped core.

                Module libz.so.1 from rpm zlib-ng-2.3.3-1.fc43.x86_64
                Module libcrypto.so.3 from rpm openssl-3.5.4-2.fc43.x86_64
                Stack trace of thread 1172301:
                #0  0x00007fded7b3a656 __strcmp_evex (libc.so.6 + 0x159656)
                #1  0x00007fded7e95397 per_machine_hostname_match (libsystemd-shared-260.so + 0x295397)
                #2  0x00007fded7e955b5 per_machine_match (libsystemd-shared-260.so + 0x2955b5)
                #3  0x00007fded7e957c6 dispatch_per_machine (libsystemd-shared-260.so + 0x2957c6)
                #4  0x00007fded7e96c97 user_record_load (libsystemd-shared-260.so + 0x296c97)
                #5  0x000000000040572d display_user (/home/fsumsal/repos/@systemd/systemd/build/userdbctl + 0x572d)
                #6  0x00007fded7ea9727 dispatch_verb (libsystemd-shared-260.so + 0x2a9727)
                #7  0x000000000041077c run (/home/fsumsal/repos/@systemd/systemd/build/userdbctl + 0x1077c)
                #8  0x00000000004107ce main (/home/fsumsal/repos/@systemd/systemd/build/userdbctl + 0x107ce)
                #9  0x00007fded79e45b5 __libc_start_call_main (libc.so.6 + 0x35b5)
                #10 0x00007fded79e4668 __libc_start_main@@GLIBC_2.34 (libc.so.6 + 0x3668)
                #11 0x00000000004038d5 _start (/home/fsumsal/repos/@systemd/systemd/build/userdbctl + 0x38d5)
                ELF object binary architecture: AMD x86-64
```

5 of 35 new or added lines in 2 files covered. (14.29%)

340 existing lines in 46 files now uncovered.

316214 of 435555 relevant lines covered (72.6%)

1144732.27 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

58.07
/src/core/main.c
1
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2

3
#include <fcntl.h>
4
#include <getopt.h>
5
#include <linux/oom.h>
6
#include <linux/vt.h>
7
#include <stdlib.h>
8
#include <sys/mount.h>
9
#include <sys/prctl.h>
10
#include <sys/utsname.h>
11
#include <unistd.h>
12

13
#if HAVE_VALGRIND_VALGRIND_H
14
#  include <valgrind/valgrind.h>
15
#endif
16

17
#include "sd-bus.h"
18
#include "sd-daemon.h"
19
#include "sd-messages.h"
20

21
#include "alloc-util.h"
22
#include "apparmor-setup.h"
23
#include "architecture.h"
24
#include "argv-util.h"
25
#include "build.h"
26
#include "bus-error.h"
27
#include "capability-util.h"
28
#include "cgroup-setup.h"
29
#include "chase.h"
30
#include "clock-util.h"
31
#include "clock-warp.h"
32
#include "conf-parser.h"
33
#include "confidential-virt.h"
34
#include "constants.h"
35
#include "copy.h"
36
#include "coredump-util.h"
37
#include "cpu-set-util.h"
38
#include "crash-handler.h"
39
#include "dbus.h"
40
#include "dbus-manager.h"
41
#include "dev-setup.h"
42
#include "efi-random.h"
43
#include "emergency-action.h"
44
#include "env-util.h"
45
#include "escape.h"
46
#include "fd-util.h"
47
#include "fdset.h"
48
#include "fileio.h"
49
#include "format-util.h"
50
#include "getopt-defs.h"
51
#include "hexdecoct.h"
52
#include "hostname-setup.h"
53
#include "id128-util.h"
54
#include "ima-setup.h"
55
#include "import-creds.h"
56
#include "initrd-util.h"
57
#include "io-util.h"
58
#include "ipe-setup.h"
59
#include "killall.h"
60
#include "kmod-setup.h"
61
#include "label-util.h"
62
#include "libmount-util.h"
63
#include "limits-util.h"
64
#include "load-fragment.h"
65
#include "log.h"
66
#include "loopback-setup.h"
67
#include "machine-id-setup.h"
68
#include "main.h"
69
#include "manager.h"
70
#include "manager-dump.h"
71
#include "manager-serialize.h"
72
#include "mkdir-label.h"
73
#include "mount-setup.h"
74
#include "mount-util.h"
75
#include "os-util.h"
76
#include "osc-context.h"
77
#include "pager.h"
78
#include "parse-argument.h"
79
#include "parse-util.h"
80
#include "path-util.h"
81
#include "pretty-print.h"
82
#include "proc-cmdline.h"
83
#include "process-util.h"
84
#include "random-util.h"
85
#include "rlimit-util.h"
86
#include "rm-rf.h"
87
#include "seccomp-util.h"
88
#include "selinux-setup.h"
89
#include "selinux-util.h"
90
#include "serialize.h"
91
#include "set.h"
92
#include "signal-util.h"
93
#include "smack-setup.h"
94
#include "special.h"
95
#include "stat-util.h"
96
#include "stdio-util.h"
97
#include "strv.h"
98
#include "switch-root.h"
99
#include "sysctl-util.h"
100
#include "terminal-util.h"
101
#include "time-util.h"
102
#include "umask-util.h"
103
#include "unit-name.h"
104
#include "user-util.h"
105
#include "utf8.h"
106
#include "version.h"
107
#include "virt.h"
108
#include "watchdog.h"
109

110
#if HAS_FEATURE_ADDRESS_SANITIZER
111
#include <sanitizer/lsan_interface.h>
112
#endif
113

114
static enum {
115
        ACTION_RUN,
116
        ACTION_HELP,
117
        ACTION_VERSION,
118
        ACTION_TEST,
119
        ACTION_DUMP_CONFIGURATION_ITEMS,
120
        ACTION_DUMP_BUS_PROPERTIES,
121
        ACTION_BUS_INTROSPECT,
122
} arg_action = ACTION_RUN;
123

124
static const char *arg_bus_introspect = NULL;
125

126
/* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access.  Real
127
 * defaults are assigned in reset_arguments() below. */
128
static char *arg_default_unit;
129
static RuntimeScope arg_runtime_scope;
130
bool arg_dump_core;
131
int arg_crash_chvt;
132
bool arg_crash_shell;
133
CrashAction arg_crash_action;
134
static char *arg_confirm_spawn;
135
static ShowStatus arg_show_status;
136
static StatusUnitFormat arg_status_unit_format;
137
static bool arg_switched_root;
138
static PagerFlags arg_pager_flags;
139
static bool arg_service_watchdogs;
140
static UnitDefaults arg_defaults;
141
static usec_t arg_runtime_watchdog;
142
static usec_t arg_reboot_watchdog;
143
static usec_t arg_kexec_watchdog;
144
static usec_t arg_pretimeout_watchdog;
145
static char *arg_early_core_pattern;
146
static char *arg_watchdog_pretimeout_governor;
147
static char *arg_watchdog_device;
148
static char **arg_default_environment;
149
static char **arg_manager_environment;
150
static uint64_t arg_capability_bounding_set;
151
static bool arg_no_new_privs;
152
static int arg_protect_system;
153
static nsec_t arg_timer_slack_nsec;
154
static Set* arg_syscall_archs;
155
static FILE* arg_serialization;
156
static sd_id128_t arg_machine_id;
157
static bool arg_machine_id_from_firmware = false;
158
static EmergencyAction arg_cad_burst_action;
159
static CPUSet arg_cpu_affinity;
160
static NUMAPolicy arg_numa_policy;
161
static usec_t arg_clock_usec;
162
static void *arg_random_seed;
163
static size_t arg_random_seed_size;
164
static usec_t arg_reload_limit_interval_sec;
165
static unsigned arg_reload_limit_burst;
166

167
/* A copy of the original environment block */
168
static char **saved_env = NULL;
169

170
static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
171
                               const struct rlimit *saved_rlimit_memlock);
172

173
static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_crash_action, crash_action, CrashAction, CRASH_FREEZE);
×
174

175
static int manager_find_user_config_paths(char ***ret_files, char ***ret_dirs) {
223✔
176
        _cleanup_free_ char *base = NULL;
223✔
177
        _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
223✔
178
        int r;
223✔
179

180
        r = xdg_user_config_dir("/systemd", &base);
223✔
181
        if (r < 0)
223✔
182
                return r;
183

184
        r = strv_extendf(&files, "%s/user.conf", base);
223✔
185
        if (r < 0)
223✔
186
                return r;
187

188
        r = strv_extend(&files, PKGSYSCONFDIR "/user.conf");
223✔
189
        if (r < 0)
223✔
190
                return r;
191

192
        r = strv_consume(&dirs, TAKE_PTR(base));
223✔
193
        if (r < 0)
223✔
194
                return r;
195

196
        r = strv_extend_strv(&dirs, CONF_PATHS_STRV("systemd"), false);
223✔
197
        if (r < 0)
223✔
198
                return r;
199

200
        *ret_files = TAKE_PTR(files);
223✔
201
        *ret_dirs = TAKE_PTR(dirs);
223✔
202
        return 0;
223✔
203
}
204

205
static int save_console_winsize_in_environment(int tty_fd) {
36✔
206
        int r;
36✔
207

208
        assert(tty_fd >= 0);
36✔
209

210
        struct winsize ws = {};
36✔
211
        if (ioctl(tty_fd, TIOCGWINSZ, &ws) < 0) {
36✔
212
                log_debug_errno(errno, "Failed to acquire console window size, ignoring.");
×
213
                goto unset;
×
214
        }
215

216
        if (ws.ws_col <= 0 && ws.ws_row <= 0) {
36✔
217
                log_debug("No console window size set, ignoring.");
×
218
                goto unset;
×
219
        }
220

221
        r = setenvf("COLUMNS", /* overwrite= */ true, "%u", ws.ws_col);
36✔
222
        if (r < 0) {
36✔
223
                log_debug_errno(r, "Failed to set $COLUMNS, ignoring: %m");
×
224
                goto unset;
×
225
        }
226

227
        r = setenvf("LINES", /* overwrite= */ true, "%u", ws.ws_row);
36✔
228
        if (r < 0) {
36✔
229
                log_debug_errno(r, "Failed to set $LINES, ignoring: %m");
×
230
                goto unset;
×
231
        }
232

233
        log_debug("Recorded console dimensions in environment: $COLUMNS=%u $LINES=%u.", ws.ws_col, ws.ws_row);
36✔
234
        return 1;
36✔
235

236
unset:
×
237
        (void) unsetenv("COLUMNS");
×
238
        (void) unsetenv("LINES");
×
239
        return 0;
×
240
}
241

242
static int console_setup(void) {
36✔
243

244
        if (getpid_cached() != 1)
36✔
245
                return 0;
36✔
246

247
        _cleanup_close_ int tty_fd = -EBADF;
36✔
248

249
        tty_fd = open_terminal("/dev/console", O_RDWR|O_NOCTTY|O_CLOEXEC);
36✔
250
        if (tty_fd < 0)
36✔
251
                return log_error_errno(tty_fd, "Failed to open %s: %m", "/dev/console");
×
252

253
        /* We don't want to force text mode. Plymouth may be showing pictures already from initrd. */
254
        reset_dev_console_fd(tty_fd, /* switch_to_text= */ false);
36✔
255

256
        save_console_winsize_in_environment(tty_fd);
36✔
257

258
        return 0;
259
}
260

261
static int parse_timeout(const char *value, usec_t *ret) {
×
262
        int r = 0;
×
263

264
        assert(value);
×
265
        assert(ret);
×
266

267
        if (streq(value, "default"))
×
268
                *ret = USEC_INFINITY;
×
269
        else if (streq(value, "off"))
×
270
                *ret = 0;
×
271
        else
272
                r = parse_sec(value, ret);
×
273

274
        return r;
×
275
}
276

277
static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
2,591✔
278
        int r;
2,591✔
279

280
        assert(key);
2,591✔
281

282
        if (STR_IN_SET(key, "systemd.unit", "rd.systemd.unit")) {
2,591✔
283

284
                if (proc_cmdline_value_missing(key, value))
83✔
285
                        return 0;
72✔
286

287
                if (!unit_name_is_valid(value, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
83✔
288
                        log_warning("Unit name specified on %s= is not valid, ignoring: %s", key, value);
×
289
                else if (in_initrd() == !!startswith(key, "rd."))
83✔
290
                        return free_and_strdup_warn(&arg_default_unit, value);
72✔
291

292
        } else if (proc_cmdline_key_streq(key, "systemd.dump_core")) {
2,508✔
293

294
                r = value ? parse_boolean(value) : true;
×
295
                if (r < 0)
×
296
                        log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
×
297
                else
298
                        arg_dump_core = r;
×
299

300
        } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
2,508✔
301

302
                if (proc_cmdline_value_missing(key, value))
83✔
303
                        return 0;
304

305
                if (path_is_absolute(value))
83✔
306
                        (void) parse_path_argument(value, false, &arg_early_core_pattern);
83✔
307
                else
308
                        log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
×
309

310
        } else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
2,425✔
311

312
                if (!value)
×
313
                        arg_crash_chvt = 0; /* turn on */
×
314
                else {
315
                        r = parse_crash_chvt(value, &arg_crash_chvt);
×
316
                        if (r < 0)
×
317
                                log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
×
318
                }
319

320
        } else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
2,425✔
321

322
                r = value ? parse_boolean(value) : true;
166✔
323
                if (r < 0)
83✔
324
                        log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
×
325
                else
326
                        arg_crash_shell = r;
166✔
327

328
        } else if (proc_cmdline_key_streq(key, "systemd.crash_reboot")) {
2,259✔
329

330
                r = value ? parse_boolean(value) : true;
×
331
                if (r < 0)
×
332
                        log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
×
333
                else
334
                        arg_crash_action = r ? CRASH_REBOOT : CRASH_FREEZE;
×
335

336
        } else if (proc_cmdline_key_streq(key, "systemd.crash_action")) {
2,259✔
337

338
                if (proc_cmdline_value_missing(key, value))
83✔
339
                        return 0;
340

341
                r = crash_action_from_string(value);
83✔
342
                if (r < 0)
83✔
343
                        log_warning_errno(r, "Failed to parse crash action switch %s, ignoring: %m", value);
×
344
                else
345
                        arg_crash_action = r;
83✔
346

347
        } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
2,176✔
348
                char *s;
×
349

350
                r = parse_confirm_spawn(value, &s);
×
351
                if (r < 0)
×
352
                        log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
×
353
                else
354
                        free_and_replace(arg_confirm_spawn, s);
×
355

356
        } else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
2,176✔
357

358
                r = value ? parse_boolean(value) : true;
×
359
                if (r < 0)
×
360
                        log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
×
361
                else
362
                        arg_service_watchdogs = r;
×
363

364
        } else if (proc_cmdline_key_streq(key, "systemd.show_status")) {
2,176✔
365

366
                if (value) {
83✔
367
                        r = parse_show_status(value, &arg_show_status);
83✔
368
                        if (r < 0)
83✔
369
                                log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
×
370
                } else
371
                        arg_show_status = SHOW_STATUS_YES;
×
372

373
        } else if (proc_cmdline_key_streq(key, "systemd.status_unit_format")) {
2,093✔
374

375
                if (proc_cmdline_value_missing(key, value))
×
376
                        return 0;
377

378
                r = status_unit_format_from_string(value);
×
379
                if (r < 0)
×
380
                        log_warning_errno(r, "Failed to parse %s=%s, ignoring: %m", key, value);
×
381
                else
382
                        arg_status_unit_format = r;
×
383

384
        } else if (proc_cmdline_key_streq(key, "systemd.default_standard_output")) {
2,093✔
385

386
                if (proc_cmdline_value_missing(key, value))
×
387
                        return 0;
388

389
                r = exec_output_from_string(value);
×
390
                if (r < 0)
×
391
                        log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
×
392
                else
393
                        arg_defaults.std_output = r;
×
394

395
        } else if (proc_cmdline_key_streq(key, "systemd.default_standard_error")) {
2,093✔
396

397
                if (proc_cmdline_value_missing(key, value))
×
398
                        return 0;
399

400
                r = exec_output_from_string(value);
×
401
                if (r < 0)
×
402
                        log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
×
403
                else
404
                        arg_defaults.std_error = r;
×
405

406
        } else if (streq(key, "systemd.setenv")) {
2,093✔
407

408
                if (proc_cmdline_value_missing(key, value))
83✔
409
                        return 0;
410

411
                if (!env_assignment_is_valid(value))
83✔
412
                        log_warning("Environment variable assignment '%s' is not valid. Ignoring.", value);
×
413
                else {
414
                        r = strv_env_replace_strdup(&arg_default_environment, value);
83✔
415
                        if (r < 0)
83✔
416
                                return log_oom();
×
417
                }
418

419
        } else if (proc_cmdline_key_streq(key, "systemd.machine_id")) {
2,010✔
420

421
                if (proc_cmdline_value_missing(key, value))
×
422
                        return 0;
423

424
                if (streq(value, "firmware"))
×
425
                        arg_machine_id_from_firmware = true;
×
426
                else {
427
                        r = id128_from_string_nonzero(value, &arg_machine_id);
×
428
                        if (r < 0)
×
429
                                log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
×
430
                        else
431
                                arg_machine_id_from_firmware = false;
×
432
                }
433
        } else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
2,010✔
434

435
                if (proc_cmdline_value_missing(key, value))
×
436
                        return 0;
437

438
                r = parse_sec(value, &arg_defaults.timeout_start_usec);
×
439
                if (r < 0)
×
440
                        log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
×
441

442
                if (arg_defaults.timeout_start_usec <= 0)
×
443
                        arg_defaults.timeout_start_usec = USEC_INFINITY;
×
444

445
        } else if (proc_cmdline_key_streq(key, "systemd.default_device_timeout_sec")) {
2,010✔
446

447
                if (proc_cmdline_value_missing(key, value))
×
448
                        return 0;
449

450
                r = parse_sec(value, &arg_defaults.device_timeout_usec);
×
451
                if (r < 0)
×
452
                        log_warning_errno(r, "Failed to parse default device timeout '%s', ignoring: %m", value);
×
453

454
                if (arg_defaults.device_timeout_usec <= 0)
×
455
                        arg_defaults.device_timeout_usec = USEC_INFINITY;
×
456

457
        } else if (proc_cmdline_key_streq(key, "systemd.cpu_affinity")) {
2,010✔
458

459
                if (proc_cmdline_value_missing(key, value))
×
460
                        return 0;
461

462
                r = parse_cpu_set(value, &arg_cpu_affinity);
×
463
                if (r < 0)
×
464
                        log_warning_errno(r, "Failed to parse CPU affinity mask '%s', ignoring: %m", value);
×
465

466
        } else if (proc_cmdline_key_streq(key, "systemd.watchdog_device")) {
2,010✔
467

468
                if (proc_cmdline_value_missing(key, value))
×
469
                        return 0;
470

471
                (void) parse_path_argument(value, false, &arg_watchdog_device);
×
472

473
        } else if (proc_cmdline_key_streq(key, "systemd.watchdog_sec")) {
2,010✔
474

475
                if (proc_cmdline_value_missing(key, value))
×
476
                        return 0;
477

478
                r = parse_timeout(value, &arg_runtime_watchdog);
×
479
                if (r < 0) {
×
480
                        log_warning_errno(r, "Failed to parse systemd.watchdog_sec= argument '%s', ignoring: %m", value);
×
481
                        return 0;
×
482
                }
483

484
                arg_kexec_watchdog = arg_reboot_watchdog = arg_runtime_watchdog;
×
485

486
        } else if (proc_cmdline_key_streq(key, "systemd.watchdog_pre_sec")) {
2,010✔
487

488
                if (proc_cmdline_value_missing(key, value))
×
489
                        return 0;
490

491
                r = parse_timeout(value, &arg_pretimeout_watchdog);
×
492
                if (r < 0) {
×
493
                        log_warning_errno(r, "Failed to parse systemd.watchdog_pre_sec= argument '%s', ignoring: %m", value);
×
494
                        return 0;
×
495
                }
496

497
        } else if (proc_cmdline_key_streq(key, "systemd.watchdog_pretimeout_governor")) {
2,010✔
498

499
                if (proc_cmdline_value_missing(key, value) || isempty(value)) {
×
500
                        arg_watchdog_pretimeout_governor = mfree(arg_watchdog_pretimeout_governor);
×
501
                        return 0;
×
502
                }
503

504
                if (!string_is_safe(value)) {
×
505
                        log_warning("Watchdog pretimeout governor '%s' is not valid, ignoring.", value);
×
506
                        return 0;
×
507
                }
508

509
                return free_and_strdup_warn(&arg_watchdog_pretimeout_governor, value);
×
510

511
        } else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
2,010✔
512

513
                if (proc_cmdline_value_missing(key, value))
×
514
                        return 0;
515

516
                r = safe_atou64(value, &arg_clock_usec);
×
517
                if (r < 0)
×
518
                        log_warning_errno(r, "Failed to parse systemd.clock_usec= argument, ignoring: %s", value);
×
519

520
        } else if (proc_cmdline_key_streq(key, "systemd.random_seed")) {
2,010✔
521
                void *p;
×
522
                size_t sz;
×
523

524
                if (proc_cmdline_value_missing(key, value))
×
525
                        return 0;
×
526

527
                r = unbase64mem(value, &p, &sz);
×
528
                if (r < 0)
×
529
                        log_warning_errno(r, "Failed to parse systemd.random_seed= argument, ignoring: %s", value);
×
530

531
                free(arg_random_seed);
×
532
                arg_random_seed = sz > 0 ? p : mfree(p);
×
533
                arg_random_seed_size = sz;
×
534

535
        } else if (proc_cmdline_key_streq(key, "systemd.reload_limit_interval_sec")) {
2,010✔
536

537
                if (proc_cmdline_value_missing(key, value))
×
538
                        return 0;
539

540
                r = parse_sec(value, &arg_reload_limit_interval_sec);
×
541
                if (r < 0) {
×
542
                        log_warning_errno(r, "Failed to parse systemd.reload_limit_interval_sec= argument '%s', ignoring: %m", value);
×
543
                        return 0;
×
544
                }
545

546
        } else if (proc_cmdline_key_streq(key, "systemd.reload_limit_burst")) {
2,010✔
547

548
                if (proc_cmdline_value_missing(key, value))
×
549
                        return 0;
550

551
                r = safe_atou(value, &arg_reload_limit_burst);
×
552
                if (r < 0) {
×
553
                        log_warning_errno(r, "Failed to parse systemd.reload_limit_burst= argument '%s', ignoring: %m", value);
×
554
                        return 0;
×
555
                }
556

557
        } else if (streq(key, "quiet") && !value) {
2,010✔
558

559
                if (arg_show_status == _SHOW_STATUS_INVALID)
×
560
                        arg_show_status = SHOW_STATUS_ERROR;
×
561

562
        } else if (streq(key, "debug") && !value) {
2,010✔
563

564
                /* Note that log_parse_environment() handles 'debug'
565
                 * too, and sets the log level to LOG_DEBUG. */
566

567
                if (detect_container() > 0)
×
568
                        log_set_target(LOG_TARGET_CONSOLE);
×
569

570
        } else if (!value) {
2,010✔
571
                const char *target;
178✔
572

573
                /* Compatible with SysV, but supported independently even if SysV compatibility is disabled. */
574
                target = runlevel_to_target(key);
178✔
575
                if (target)
178✔
576
                        return free_and_strdup_warn(&arg_default_unit, target);
×
577
        }
578

579
        return 0;
2,519✔
580
}
581

582
#define DEFINE_SETTER(name, func, descr)                              \
583
        static int name(const char *unit,                             \
584
                        const char *filename,                         \
585
                        unsigned line,                                \
586
                        const char *section,                          \
587
                        unsigned section_line,                        \
588
                        const char *lvalue,                           \
589
                        int ltype,                                    \
590
                        const char *rvalue,                           \
591
                        void *data,                                   \
592
                        void *userdata) {                             \
593
                                                                      \
594
                int r;                                                \
595
                                                                      \
596
                assert(filename);                                     \
597
                assert(lvalue);                                       \
598
                assert(rvalue);                                       \
599
                                                                      \
600
                r = func(rvalue);                                     \
601
                if (r < 0)                                            \
602
                        log_syntax(unit, LOG_ERR, filename, line, r,  \
603
                                   "Invalid " descr "'%s': %m",       \
604
                                   rvalue);                           \
605
                                                                      \
606
                return 0;                                             \
607
        }
608

609
DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level");
×
610
DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target");
×
611
DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color");
×
612
DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location");
×
613
DEFINE_SETTER(config_parse_time, log_show_time_from_string, "time");
×
614

615
static int config_parse_default_timeout_abort(
×
616
                const char *unit,
617
                const char *filename,
618
                unsigned line,
619
                const char *section,
620
                unsigned section_line,
621
                const char *lvalue,
622
                int ltype,
623
                const char *rvalue,
624
                void *data,
625
                void *userdata) {
626
        int r;
×
627

628
        r = config_parse_timeout_abort(
×
629
                        unit,
630
                        filename,
631
                        line,
632
                        section,
633
                        section_line,
634
                        lvalue,
635
                        ltype,
636
                        rvalue,
637
                        &arg_defaults.timeout_abort_usec,
638
                        userdata);
639
        if (r >= 0)
×
640
                arg_defaults.timeout_abort_set = r;
×
641
        return 0;
×
642
}
643

644
static int config_parse_oom_score_adjust(
×
645
                const char *unit,
646
                const char *filename,
647
                unsigned line,
648
                const char *section,
649
                unsigned section_line,
650
                const char *lvalue,
651
                int ltype,
652
                const char *rvalue,
653
                void *data,
654
                void *userdata) {
655

656
        int oa, r;
×
657

658
        if (isempty(rvalue)) {
×
659
                arg_defaults.oom_score_adjust_set = false;
×
660
                return 0;
×
661
        }
662

663
        r = parse_oom_score_adjust(rvalue, &oa);
×
664
        if (r < 0)
×
665
                return log_syntax_parse_error(unit, filename, line, r, lvalue, rvalue);
×
666

667
        arg_defaults.oom_score_adjust = oa;
×
668
        arg_defaults.oom_score_adjust_set = true;
×
669

670
        return 0;
×
671
}
672

673
static int config_parse_protect_system_pid1(
×
674
                const char *unit,
675
                const char *filename,
676
                unsigned line,
677
                const char *section,
678
                unsigned section_line,
679
                const char *lvalue,
680
                int ltype,
681
                const char *rvalue,
682
                void *data,
683
                void *userdata) {
684

685
        int *v = ASSERT_PTR(data), r;
×
686

687
        /* This is modelled after the per-service ProtectSystem= setting, but a bit more restricted on one
688
         * hand, and more automatic in another. i.e. we currently only support yes/no (not "strict" or
689
         * "full"). And we will enable this automatically for the initrd unless configured otherwise.
690
         *
691
         * We might extend this later to match more closely what the per-service ProtectSystem= can do, but
692
         * this is not trivial, due to ordering constraints: besides /usr/ we don't really have much mounted
693
         * at the moment we enable this logic. */
694

695
        if (isempty(rvalue) || streq(rvalue, "auto")) {
×
696
                *v = -1;
×
697
                return 0;
×
698
        }
699

700
        r = parse_boolean(rvalue);
×
701
        if (r < 0)
×
702
                return log_syntax_parse_error(unit, filename, line, r, lvalue, rvalue);
×
703

704
        *v = r;
×
705
        return 0;
×
706
}
707

708
static int config_parse_crash_reboot(
×
709
                const char *unit,
710
                const char *filename,
711
                unsigned line,
712
                const char *section,
713
                unsigned section_line,
714
                const char *lvalue,
715
                int ltype,
716
                const char *rvalue,
717
                void *data,
718
                void *userdata) {
719

720
        CrashAction *v = ASSERT_PTR(data);
×
721
        int r;
×
722

723
        if (isempty(rvalue)) {
×
724
                *v = CRASH_REBOOT;
×
725
                return 0;
×
726
        }
727

728
        r = parse_boolean(rvalue);
×
729
        if (r < 0)
×
730
                return log_syntax_parse_error(unit, filename, line, r, lvalue, rvalue);
×
731

732
        *v = r > 0 ? CRASH_REBOOT : CRASH_FREEZE;
×
733
        return 0;
×
734
}
735

736
static int parse_config_file(void) {
306✔
737
        const ConfigTableItem items[] = {
306✔
738
                { "Manager", "LogLevel",                     config_parse_level2,                0,                        NULL                              },
739
                { "Manager", "LogTarget",                    config_parse_target,                0,                        NULL                              },
740
                { "Manager", "LogColor",                     config_parse_color,                 0,                        NULL                              },
741
                { "Manager", "LogLocation",                  config_parse_location,              0,                        NULL                              },
742
                { "Manager", "LogTime",                      config_parse_time,                  0,                        NULL                              },
743
                { "Manager", "DumpCore",                     config_parse_bool,                  0,                        &arg_dump_core                    },
744
                { "Manager", "CrashChVT", /* legacy */       config_parse_crash_chvt,            0,                        &arg_crash_chvt                   },
745
                { "Manager", "CrashChangeVT",                config_parse_crash_chvt,            0,                        &arg_crash_chvt                   },
746
                { "Manager", "CrashShell",                   config_parse_bool,                  0,                        &arg_crash_shell                  },
747
                { "Manager", "CrashReboot",                  config_parse_crash_reboot,          0,                        &arg_crash_action                 },
748
                { "Manager", "CrashAction",                  config_parse_crash_action,          0,                        &arg_crash_action                 },
749
                { "Manager", "ShowStatus",                   config_parse_show_status,           0,                        &arg_show_status                  },
750
                { "Manager", "StatusUnitFormat",             config_parse_status_unit_format,    0,                        &arg_status_unit_format           },
751
                { "Manager", "CPUAffinity",                  config_parse_cpu_set,               0,                        &arg_cpu_affinity                 },
752
                { "Manager", "NUMAPolicy",                   config_parse_numa_policy,           0,                        &arg_numa_policy.type             },
753
                { "Manager", "NUMAMask",                     config_parse_numa_mask,             0,                        &arg_numa_policy.nodes            },
754
                { "Manager", "JoinControllers",              config_parse_warn_compat,           DISABLED_LEGACY,          NULL                              },
755
                { "Manager", "RuntimeWatchdogSec",           config_parse_watchdog_sec,          0,                        &arg_runtime_watchdog             },
756
                { "Manager", "RuntimeWatchdogPreSec",        config_parse_watchdog_sec,          0,                        &arg_pretimeout_watchdog          },
757
                { "Manager", "RebootWatchdogSec",            config_parse_watchdog_sec,          0,                        &arg_reboot_watchdog              },
758
                { "Manager", "ShutdownWatchdogSec",          config_parse_watchdog_sec,          0,                        &arg_reboot_watchdog              }, /* obsolete alias */
759
                { "Manager", "KExecWatchdogSec",             config_parse_watchdog_sec,          0,                        &arg_kexec_watchdog               },
760
                { "Manager", "WatchdogDevice",               config_parse_path,                  0,                        &arg_watchdog_device              },
761
                { "Manager", "RuntimeWatchdogPreGovernor",   config_parse_string,                CONFIG_PARSE_STRING_SAFE, &arg_watchdog_pretimeout_governor },
762
                { "Manager", "CapabilityBoundingSet",        config_parse_capability_set,        0,                        &arg_capability_bounding_set      },
763
                { "Manager", "NoNewPrivileges",              config_parse_bool,                  0,                        &arg_no_new_privs                 },
764
                { "Manager", "ProtectSystem",                config_parse_protect_system_pid1,   0,                        &arg_protect_system               },
765
#if HAVE_SECCOMP
766
                { "Manager", "SystemCallArchitectures",      config_parse_syscall_archs,         0,                        &arg_syscall_archs                },
767
#else
768
                { "Manager", "SystemCallArchitectures",      config_parse_warn_compat,           DISABLED_CONFIGURATION,   NULL                              },
769

770
#endif
771
                { "Manager", "TimerSlackNSec",               config_parse_nsec,                  0,                        &arg_timer_slack_nsec             },
772
                { "Manager", "DefaultTimerAccuracySec",      config_parse_sec,                   0,                        &arg_defaults.timer_accuracy_usec },
773
                { "Manager", "DefaultStandardOutput",        config_parse_output_restricted,     0,                        &arg_defaults.std_output          },
774
                { "Manager", "DefaultStandardError",         config_parse_output_restricted,     0,                        &arg_defaults.std_error           },
775
                { "Manager", "DefaultTimeoutStartSec",       config_parse_sec,                   0,                        &arg_defaults.timeout_start_usec  },
776
                { "Manager", "DefaultTimeoutStopSec",        config_parse_sec,                   0,                        &arg_defaults.timeout_stop_usec   },
777
                { "Manager", "DefaultTimeoutAbortSec",       config_parse_default_timeout_abort, 0,                        NULL                              },
778
                { "Manager", "DefaultDeviceTimeoutSec",      config_parse_sec,                   0,                        &arg_defaults.device_timeout_usec },
779
                { "Manager", "DefaultRestartSec",            config_parse_sec,                   0,                        &arg_defaults.restart_usec        },
780
                { "Manager", "DefaultStartLimitInterval",    config_parse_sec,                   0,                        &arg_defaults.start_limit.interval}, /* obsolete alias */
781
                { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec,                   0,                        &arg_defaults.start_limit.interval},
782
                { "Manager", "DefaultStartLimitBurst",       config_parse_unsigned,              0,                        &arg_defaults.start_limit.burst   },
783
                { "Manager", "DefaultRestrictSUIDSGID",      config_parse_bool,                  0,                        &arg_defaults.restrict_suid_sgid  },
784
                { "Manager", "DefaultEnvironment",           config_parse_environ,               arg_runtime_scope,        &arg_default_environment          },
785
                { "Manager", "ManagerEnvironment",           config_parse_environ,               arg_runtime_scope,        &arg_manager_environment          },
786
                { "Manager", "DefaultLimitCPU",              config_parse_rlimit,                RLIMIT_CPU,               arg_defaults.rlimit               },
787
                { "Manager", "DefaultLimitFSIZE",            config_parse_rlimit,                RLIMIT_FSIZE,             arg_defaults.rlimit               },
788
                { "Manager", "DefaultLimitDATA",             config_parse_rlimit,                RLIMIT_DATA,              arg_defaults.rlimit               },
789
                { "Manager", "DefaultLimitSTACK",            config_parse_rlimit,                RLIMIT_STACK,             arg_defaults.rlimit               },
790
                { "Manager", "DefaultLimitCORE",             config_parse_rlimit,                RLIMIT_CORE,              arg_defaults.rlimit               },
791
                { "Manager", "DefaultLimitRSS",              config_parse_rlimit,                RLIMIT_RSS,               arg_defaults.rlimit               },
792
                { "Manager", "DefaultLimitNOFILE",           config_parse_rlimit,                RLIMIT_NOFILE,            arg_defaults.rlimit               },
793
                { "Manager", "DefaultLimitAS",               config_parse_rlimit,                RLIMIT_AS,                arg_defaults.rlimit               },
794
                { "Manager", "DefaultLimitNPROC",            config_parse_rlimit,                RLIMIT_NPROC,             arg_defaults.rlimit               },
795
                { "Manager", "DefaultLimitMEMLOCK",          config_parse_rlimit,                RLIMIT_MEMLOCK,           arg_defaults.rlimit               },
796
                { "Manager", "DefaultLimitLOCKS",            config_parse_rlimit,                RLIMIT_LOCKS,             arg_defaults.rlimit               },
797
                { "Manager", "DefaultLimitSIGPENDING",       config_parse_rlimit,                RLIMIT_SIGPENDING,        arg_defaults.rlimit               },
798
                { "Manager", "DefaultLimitMSGQUEUE",         config_parse_rlimit,                RLIMIT_MSGQUEUE,          arg_defaults.rlimit               },
799
                { "Manager", "DefaultLimitNICE",             config_parse_rlimit,                RLIMIT_NICE,              arg_defaults.rlimit               },
800
                { "Manager", "DefaultLimitRTPRIO",           config_parse_rlimit,                RLIMIT_RTPRIO,            arg_defaults.rlimit               },
801
                { "Manager", "DefaultLimitRTTIME",           config_parse_rlimit,                RLIMIT_RTTIME,            arg_defaults.rlimit               },
802
                { "Manager", "DefaultCPUAccounting",         config_parse_warn_compat,           DISABLED_LEGACY,          NULL                              },
803
                { "Manager", "DefaultIOAccounting",          config_parse_bool,                  0,                        &arg_defaults.io_accounting       },
804
                { "Manager", "DefaultIPAccounting",          config_parse_bool,                  0,                        &arg_defaults.ip_accounting       },
805
                { "Manager", "DefaultBlockIOAccounting",     config_parse_warn_compat,           DISABLED_LEGACY,          NULL                              },
806
                { "Manager", "DefaultMemoryAccounting",      config_parse_bool,                  0,                        &arg_defaults.memory_accounting   },
807
                { "Manager", "DefaultTasksAccounting",       config_parse_bool,                  0,                        &arg_defaults.tasks_accounting    },
808
                { "Manager", "DefaultTasksMax",              config_parse_tasks_max,             0,                        &arg_defaults.tasks_max           },
809
                { "Manager", "DefaultMemoryPressureThresholdSec", config_parse_sec,              0,                        &arg_defaults.memory_pressure_threshold_usec },
810
                { "Manager", "DefaultMemoryPressureWatch",   config_parse_memory_pressure_watch, 0,                        &arg_defaults.memory_pressure_watch },
811
                { "Manager", "CtrlAltDelBurstAction",        config_parse_emergency_action,      arg_runtime_scope,        &arg_cad_burst_action             },
812
                { "Manager", "DefaultOOMPolicy",             config_parse_oom_policy,            0,                        &arg_defaults.oom_policy          },
813
                { "Manager", "DefaultOOMScoreAdjust",        config_parse_oom_score_adjust,      0,                        NULL                              },
814
                { "Manager", "ReloadLimitIntervalSec",       config_parse_sec,                   0,                        &arg_reload_limit_interval_sec    },
815
                { "Manager", "ReloadLimitBurst",             config_parse_unsigned,              0,                        &arg_reload_limit_burst           },
816
#if ENABLE_SMACK
817
                { "Manager", "DefaultSmackProcessLabel",     config_parse_string,                0,                        &arg_defaults.smack_process_label },
818
#else
819
                { "Manager", "DefaultSmackProcessLabel",     config_parse_warn_compat,           DISABLED_CONFIGURATION,   NULL                              },
820
#endif
821
                {}
822
        };
823

824
        if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM)
306✔
825
                (void) config_parse_standard_file_with_dropins(
83✔
826
                                "systemd/system.conf",
827
                                "Manager\0",
828
                                config_item_table_lookup, items,
829
                                CONFIG_PARSE_WARN,
830
                                /* userdata= */ NULL);
831
        else {
832
                _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
223✔
833
                int r;
223✔
834

835
                assert(arg_runtime_scope == RUNTIME_SCOPE_USER);
223✔
836

837
                r = manager_find_user_config_paths(&files, &dirs);
223✔
838
                if (r < 0)
223✔
839
                        return log_error_errno(r, "Failed to determine config file paths: %m");
×
840

841
                (void) config_parse_many(
223✔
842
                                (const char* const*) files,
843
                                (const char* const*) dirs,
844
                                "user.conf.d",
845
                                "Manager\0",
846
                                config_item_table_lookup, items,
847
                                CONFIG_PARSE_WARN,
848
                                /* userdata= */ NULL);
849
        }
850

851
        /* Traditionally "0" was used to turn off the default unit timeouts. Fix this up so that we use
852
         * USEC_INFINITY like everywhere else. */
853
        if (arg_defaults.timeout_start_usec <= 0)
306✔
854
                arg_defaults.timeout_start_usec = USEC_INFINITY;
×
855
        if (arg_defaults.timeout_stop_usec <= 0)
306✔
856
                arg_defaults.timeout_stop_usec = USEC_INFINITY;
×
857

858
        return 0;
859
}
860

861
static void set_manager_defaults(Manager *m) {
306✔
862
        int r;
306✔
863

864
        assert(m);
306✔
865

866
        /* Propagates the various default unit property settings into the manager object, i.e. properties
867
         * that do not affect the manager itself, but are just what newly allocated units will have set if
868
         * they haven't set anything else. (Also see set_manager_settings() for the settings that affect the
869
         * manager's own behaviour) */
870

871
        r = manager_set_unit_defaults(m, &arg_defaults);
306✔
872
        if (r < 0)
306✔
873
                log_warning_errno(r, "Failed to set manager defaults, ignoring: %m");
×
874

875
        r = manager_default_environment(m);
306✔
876
        if (r < 0)
306✔
877
                log_warning_errno(r, "Failed to set manager default environment, ignoring: %m");
×
878

879
        r = manager_transient_environment_add(m, arg_default_environment);
306✔
880
        if (r < 0)
306✔
881
                log_warning_errno(r, "Failed to add to transient environment, ignoring: %m");
×
882
}
306✔
883

884
static void set_manager_settings(Manager *m) {
306✔
885
        int r;
306✔
886

887
        assert(m);
306✔
888

889
        /* Propagates the various manager settings into the manager object, i.e. properties that
890
         * affect the manager itself (as opposed to just being inherited into newly allocated
891
         * units, see set_manager_defaults() above). */
892

893
        m->confirm_spawn = arg_confirm_spawn;
306✔
894
        m->service_watchdogs = arg_service_watchdogs;
306✔
895
        m->cad_burst_action = arg_cad_burst_action;
306✔
896
        /* Note that we don't do structured initialization here, otherwise it will reset the rate limit
897
         * counter on every daemon-reload. */
898
        m->reload_reexec_ratelimit.interval = arg_reload_limit_interval_sec;
306✔
899
        m->reload_reexec_ratelimit.burst = arg_reload_limit_burst;
306✔
900

901
        manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
306✔
902
        manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
306✔
903
        manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog);
306✔
904
        manager_set_watchdog(m, WATCHDOG_PRETIMEOUT, arg_pretimeout_watchdog);
306✔
905
        r = manager_set_watchdog_pretimeout_governor(m, arg_watchdog_pretimeout_governor);
306✔
906
        if (r < 0)
306✔
907
                log_warning_errno(r, "Failed to set watchdog pretimeout governor to '%s', ignoring: %m", arg_watchdog_pretimeout_governor);
×
908

909
        manager_set_show_status(m, arg_show_status, "command line");
306✔
910
        m->status_unit_format = arg_status_unit_format;
306✔
911
}
306✔
912

913
static int parse_argv(int argc, char *argv[]) {
243✔
914
        enum {
243✔
915
                COMMON_GETOPT_ARGS,
916
                SYSTEMD_GETOPT_ARGS,
917
        };
918

919
        static const struct option options[] = {
243✔
920
                COMMON_GETOPT_OPTIONS,
921
                SYSTEMD_GETOPT_OPTIONS,
922
                {}
923
        };
924

925
        int c, r;
243✔
926
        bool user_arg_seen = false;
243✔
927

928
        assert(argc >= 1);
243✔
929
        assert(argv);
243✔
930

931
        if (getpid_cached() == 1)
243✔
932
                opterr = 0;
52✔
933

934
        while ((c = getopt_long(argc, argv, SYSTEMD_GETOPT_SHORT_OPTIONS, options, NULL)) >= 0)
477✔
935

936
                switch (c) {
234✔
937

938
                case ARG_LOG_LEVEL:
×
939
                        r = log_set_max_level_from_string(optarg);
×
940
                        if (r < 0)
×
941
                                return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
×
942

943
                        break;
944

945
                case ARG_LOG_TARGET:
×
946
                        r = log_set_target_from_string(optarg);
×
947
                        if (r < 0)
×
948
                                return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
×
949

950
                        break;
951

952
                case ARG_LOG_COLOR:
×
953

954
                        if (optarg) {
×
955
                                r = log_show_color_from_string(optarg);
×
956
                                if (r < 0)
×
957
                                        return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
×
958
                                                               optarg);
959
                        } else
960
                                log_show_color(true);
×
961

962
                        break;
963

964
                case ARG_LOG_LOCATION:
×
965
                        if (optarg) {
×
966
                                r = log_show_location_from_string(optarg);
×
967
                                if (r < 0)
×
968
                                        return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
×
969
                                                               optarg);
970
                        } else
971
                                log_show_location(true);
×
972

973
                        break;
974

975
                case ARG_LOG_TIME:
×
976

977
                        if (optarg) {
×
978
                                r = log_show_time_from_string(optarg);
×
979
                                if (r < 0)
×
980
                                        return log_error_errno(r, "Failed to parse log time setting \"%s\": %m",
×
981
                                                               optarg);
982
                        } else
983
                                log_show_time(true);
×
984

985
                        break;
986

987
                case ARG_DEFAULT_STD_OUTPUT:
×
988
                        r = exec_output_from_string(optarg);
×
989
                        if (r < 0)
×
990
                                return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
×
991
                                                       optarg);
992
                        arg_defaults.std_output = r;
×
993
                        break;
×
994

995
                case ARG_DEFAULT_STD_ERROR:
×
996
                        r = exec_output_from_string(optarg);
×
997
                        if (r < 0)
×
998
                                return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
×
999
                                                       optarg);
1000
                        arg_defaults.std_error = r;
×
1001
                        break;
×
1002

1003
                case ARG_UNIT:
1✔
1004
                        r = free_and_strdup(&arg_default_unit, optarg);
1✔
1005
                        if (r < 0)
1✔
1006
                                return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
×
1007

1008
                        break;
1009

1010
                case ARG_SYSTEM:
19✔
1011
                        arg_runtime_scope = RUNTIME_SCOPE_SYSTEM;
19✔
1012
                        break;
19✔
1013

1014
                case ARG_USER:
191✔
1015
                        arg_runtime_scope = RUNTIME_SCOPE_USER;
191✔
1016
                        user_arg_seen = true;
191✔
1017
                        break;
191✔
1018

1019
                case ARG_TEST:
×
1020
                        arg_action = ACTION_TEST;
×
1021
                        break;
×
1022

1023
                case ARG_NO_PAGER:
×
1024
                        arg_pager_flags |= PAGER_DISABLE;
×
1025
                        break;
×
1026

1027
                case ARG_VERSION:
×
1028
                        arg_action = ACTION_VERSION;
×
1029
                        break;
×
1030

1031
                case ARG_DUMP_CONFIGURATION_ITEMS:
×
1032
                        arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
×
1033
                        break;
×
1034

1035
                case ARG_DUMP_BUS_PROPERTIES:
×
1036
                        arg_action = ACTION_DUMP_BUS_PROPERTIES;
×
1037
                        break;
×
1038

1039
                case ARG_BUS_INTROSPECT:
×
1040
                        arg_bus_introspect = optarg;
×
1041
                        arg_action = ACTION_BUS_INTROSPECT;
×
1042
                        break;
×
1043

1044
                case ARG_DUMP_CORE:
×
1045
                        r = parse_boolean_argument("--dump-core", optarg, &arg_dump_core);
×
1046
                        if (r < 0)
×
1047
                                return r;
1048
                        break;
1049

1050
                case ARG_CRASH_CHVT:
×
1051
                        r = parse_crash_chvt(optarg, &arg_crash_chvt);
×
1052
                        if (r < 0)
×
1053
                                return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
×
1054
                                                       optarg);
1055
                        break;
1056

1057
                case ARG_CRASH_SHELL:
×
1058
                        r = parse_boolean_argument("--crash-shell", optarg, &arg_crash_shell);
×
1059
                        if (r < 0)
×
1060
                                return r;
1061
                        break;
1062

1063
                case ARG_CRASH_REBOOT:
×
1064
                        r = parse_boolean_argument("--crash-reboot", optarg, NULL);
×
1065
                        if (r < 0)
×
1066
                                return r;
1067
                        arg_crash_action = r > 0 ? CRASH_REBOOT : CRASH_FREEZE;
×
1068
                        break;
×
1069

1070
                case ARG_CRASH_ACTION:
×
1071
                        r = crash_action_from_string(optarg);
×
1072
                        if (r < 0)
×
1073
                                return log_error_errno(r, "Failed to parse crash action \"%s\": %m", optarg);
×
1074
                        arg_crash_action = r;
×
1075
                        break;
×
1076

1077
                case ARG_CONFIRM_SPAWN:
×
1078
                        arg_confirm_spawn = mfree(arg_confirm_spawn);
×
1079

1080
                        r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
×
1081
                        if (r < 0)
×
1082
                                return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
×
1083
                                                       optarg);
1084
                        break;
1085

1086
                case ARG_SERVICE_WATCHDOGS:
×
1087
                        r = parse_boolean_argument("--service-watchdogs=", optarg, &arg_service_watchdogs);
×
1088
                        if (r < 0)
×
1089
                                return r;
1090
                        break;
1091

1092
                case ARG_SHOW_STATUS:
×
1093
                        if (optarg) {
×
1094
                                r = parse_show_status(optarg, &arg_show_status);
×
1095
                                if (r < 0)
×
1096
                                        return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
×
1097
                                                               optarg);
1098
                        } else
1099
                                arg_show_status = SHOW_STATUS_YES;
×
1100
                        break;
1101

1102
                case ARG_DESERIALIZE: {
20✔
1103
                        int fd;
20✔
1104
                        FILE *f;
20✔
1105

1106
                        fd = parse_fd(optarg);
20✔
1107
                        if (fd < 0)
20✔
1108
                                return log_error_errno(fd, "Failed to parse serialization fd \"%s\": %m", optarg);
×
1109

1110
                        (void) fd_cloexec(fd, true);
20✔
1111

1112
                        f = fdopen(fd, "r");
20✔
1113
                        if (!f)
20✔
1114
                                return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
×
1115

1116
                        safe_fclose(arg_serialization);
20✔
1117
                        arg_serialization = f;
20✔
1118

1119
                        break;
20✔
1120
                }
1121

1122
                case ARG_SWITCHED_ROOT:
3✔
1123
                        arg_switched_root = true;
3✔
1124
                        break;
3✔
1125

1126
                case ARG_MACHINE_ID:
×
1127
                        r = id128_from_string_nonzero(optarg, &arg_machine_id);
×
1128
                        if (r < 0)
×
1129
                                return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
×
1130
                        break;
1131

1132
                case 'h':
×
1133
                        arg_action = ACTION_HELP;
×
1134
                        break;
×
1135

1136
                case 'D':
×
1137
                        log_set_max_level(LOG_DEBUG);
×
1138
                        break;
×
1139

1140
                case 'b':
×
1141
                case 's':
1142
                case 'z':
1143
                        /* Just to eat away the sysvinit kernel cmdline args that we'll parse in
1144
                         * parse_proc_cmdline_item() or ignore, without any getopt() error messages.
1145
                         */
1146
                case '?':
1147
                        if (getpid_cached() != 1)
×
1148
                                return -EINVAL;
1149
                        else
1150
                                return 0;
×
1151

1152
                default:
×
1153
                        assert_not_reached();
×
1154
                }
1155

1156
        if (optind < argc && getpid_cached() != 1)
243✔
1157
                /* Hmm, when we aren't run as init system let's complain about excess arguments */
1158
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Excess arguments.");
×
1159

1160
        if (arg_action == ACTION_RUN && arg_runtime_scope == RUNTIME_SCOPE_USER && !user_arg_seen)
243✔
1161
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
×
1162
                                       "Explicit --user argument required to run as user manager.");
1163

1164
        return 0;
1165
}
1166

1167
static int help(void) {
×
1168
        _cleanup_free_ char *link = NULL;
×
1169
        int r;
×
1170

1171
        r = terminal_urlify_man("systemd", "1", &link);
×
1172
        if (r < 0)
×
1173
                return log_oom();
×
1174

1175
        printf("%s [OPTIONS...]\n\n"
×
1176
               "%sStarts and monitors system and user services.%s\n\n"
1177
               "This program takes no positional arguments.\n\n"
1178
               "%sOptions%s:\n"
1179
               "  -h --help                      Show this help\n"
1180
               "     --version                   Show version\n"
1181
               "     --test                      Determine initial transaction, dump it and exit\n"
1182
               "     --system                    Combined with --test: operate in system mode\n"
1183
               "     --user                      Combined with --test: operate in user mode\n"
1184
               "     --dump-configuration-items  Dump understood unit configuration items\n"
1185
               "     --dump-bus-properties       Dump exposed bus properties\n"
1186
               "     --bus-introspect=PATH       Write XML introspection data\n"
1187
               "     --unit=UNIT                 Set default unit\n"
1188
               "     --dump-core[=BOOL]          Dump core on crash\n"
1189
               "     --crash-vt=NR               Change to specified VT on crash\n"
1190
               "     --crash-action=ACTION       Specify what to do on crash\n"
1191
               "     --crash-shell[=BOOL]        Run shell on crash\n"
1192
               "     --confirm-spawn[=BOOL]      Ask for confirmation when spawning processes\n"
1193
               "     --show-status[=BOOL]        Show status updates on the console during boot\n"
1194
               "     --log-target=TARGET         Set log target (console, journal, kmsg,\n"
1195
               "                                                 journal-or-kmsg, null)\n"
1196
               "     --log-level=LEVEL           Set log level (debug, info, notice, warning,\n"
1197
               "                                                err, crit, alert, emerg)\n"
1198
               "     --log-color[=BOOL]          Highlight important log messages\n"
1199
               "     --log-location[=BOOL]       Include code location in log messages\n"
1200
               "     --log-time[=BOOL]           Prefix log messages with current time\n"
1201
               "     --default-standard-output=  Set default standard output for services\n"
1202
               "     --default-standard-error=   Set default standard error output for services\n"
1203
               "     --no-pager                  Do not pipe output into a pager\n"
1204
               "\nSee the %s for details.\n",
1205
               program_invocation_short_name,
1206
               ansi_highlight(),
1207
               ansi_normal(),
1208
               ansi_underline(),
1209
               ansi_normal(),
1210
               link);
1211

1212
        return 0;
1213
}
1214

1215
static int prepare_reexecute(
39✔
1216
                Manager *m,
1217
                FILE **ret_f,
1218
                FDSet **ret_fds,
1219
                bool switching_root) {
1220

1221
        _cleanup_fdset_free_ FDSet *fds = NULL;
×
1222
        _cleanup_fclose_ FILE *f = NULL;
39✔
1223
        int r;
39✔
1224

1225
        assert(m);
39✔
1226
        assert(ret_f);
39✔
1227
        assert(ret_fds);
39✔
1228

1229
        /* Make sure nothing is really destructed when we shut down */
1230
        m->n_reloading++;
39✔
1231
        bus_manager_send_reloading(m, true);
39✔
1232

1233
        r = manager_open_serialization(m, &f);
39✔
1234
        if (r < 0)
39✔
1235
                return log_error_errno(r, "Failed to create serialization file: %m");
×
1236

1237
        fds = fdset_new();
39✔
1238
        if (!fds)
39✔
1239
                return log_oom();
×
1240

1241
        r = manager_serialize(m, f, fds, switching_root);
39✔
1242
        if (r < 0)
39✔
1243
                return r;
1244

1245
        r = finish_serialization_file(f);
39✔
1246
        if (r < 0)
39✔
1247
                return log_error_errno(r, "Failed to finish serialization file: %m");
×
1248

1249
        r = fd_cloexec(fileno(f), false);
39✔
1250
        if (r < 0)
39✔
1251
                return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization: %m");
×
1252

1253
        r = fdset_cloexec(fds, false);
39✔
1254
        if (r < 0)
39✔
1255
                return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
×
1256

1257
        *ret_f = TAKE_PTR(f);
39✔
1258
        *ret_fds = TAKE_PTR(fds);
39✔
1259

1260
        return 0;
39✔
1261
}
1262

1263
static void bump_file_max_and_nr_open(void) {
36✔
1264

1265
        /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large
1266
         * numbers of file descriptors are no longer a performance problem and their memory is properly
1267
         * tracked by memcg, thus counting them and limiting them in another two layers of limits is
1268
         * unnecessary and just complicates things. This function hence turns off 2 of the 4 levels of limits
1269
         * on file descriptors, and makes RLIMIT_NOLIMIT (soft + hard) the only ones that really matter. */
1270

1271
#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
1272
        int r;
36✔
1273
#endif
1274

1275
#if BUMP_PROC_SYS_FS_FILE_MAX
1276
        /* The maximum the kernel allows for this since 5.2 is LONG_MAX, use that. (Previously things were
1277
         * different, but the operation would fail silently.) */
1278
        r = sysctl_write("fs/file-max", LONG_MAX_STR);
36✔
1279
        if (r < 0)
36✔
1280
                log_full_errno(ERRNO_IS_NEG_FS_WRITE_REFUSED(r) ? LOG_DEBUG : LOG_WARNING, r,
36✔
1281
                               "Failed to bump fs.file-max, ignoring: %m");
1282
#endif
1283

1284
#if BUMP_PROC_SYS_FS_NR_OPEN
1285
        /* The kernel enforces maximum and minimum values on the fs.nr_open, but they are not directly
1286
         * exposed, but hardcoded in fs/file.c. Hopefully, these values will not be changed, but not sure.
1287
         * Let's first try the hardcoded maximum value, and if it does not work, try the half of it. */
1288

1289
        for (unsigned v = NR_OPEN_MAXIMUM; v >= NR_OPEN_MINIMUM; v /= 2) {
36✔
1290
                unsigned k = read_nr_open();
36✔
1291
                if (k >= v) { /* Already larger */
36✔
1292
                        log_debug("Skipping bump, value is already larger.");
3✔
1293
                        break;
1294
                }
1295

1296
                r = sysctl_writef("fs/nr_open", "%u", v);
33✔
1297
                if (r == -EINVAL) {
33✔
1298
                        log_debug("Couldn't write fs.nr_open as %u, halving it.", v);
×
1299
                        continue;
×
1300
                }
1301
                if (r < 0) {
33✔
1302
                        log_full_errno(ERRNO_IS_NEG_FS_WRITE_REFUSED(r) ? LOG_DEBUG : LOG_WARNING, r,
19✔
1303
                                       "Failed to bump fs.nr_open, ignoring: %m");
1304
                        break;
1305
                }
1306

1307
                log_debug("Successfully bumped fs.nr_open to %u", v);
14✔
1308
                break;
1309
        }
1310
#endif
1311
}
36✔
1312

1313
static int bump_rlimit_nofile(const struct rlimit *saved_rlimit) {
243✔
1314
        struct rlimit new_rlimit;
243✔
1315
        int r;
243✔
1316

1317
        /* Get the underlying absolute limit the kernel enforces */
1318
        unsigned nr = read_nr_open();
243✔
1319

1320
        /* Calculate the new limits to use for us. Never lower from what we inherited. */
1321
        new_rlimit = (struct rlimit) {
243✔
1322
                .rlim_cur = MAX((rlim_t) nr, saved_rlimit->rlim_cur),
243✔
1323
                .rlim_max = MAX((rlim_t) nr, saved_rlimit->rlim_max),
243✔
1324
        };
1325

1326
        /* Shortcut if nothing changes. */
1327
        if (saved_rlimit->rlim_max >= new_rlimit.rlim_max &&
243✔
1328
            saved_rlimit->rlim_cur >= new_rlimit.rlim_cur) {
×
1329
                log_debug("RLIMIT_NOFILE is already as high or higher than we need it, not bumping.");
×
1330
                return 0;
×
1331
        }
1332

1333
        /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
1334
         * both hard and soft. */
1335
        r = setrlimit_closest(RLIMIT_NOFILE, &new_rlimit);
243✔
1336
        if (r < 0)
243✔
1337
                return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
×
1338

1339
        return 0;
1340
}
1341

1342
static int bump_rlimit_memlock(const struct rlimit *saved_rlimit) {
243✔
1343
        struct rlimit new_rlimit;
243✔
1344
        uint64_t mm;
243✔
1345
        int r;
243✔
1346

1347
        /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK
1348
         * which should normally disable such checks. We need them to implement IPAddressAllow= and
1349
         * IPAddressDeny=, hence let's bump the value high enough for our user. */
1350

1351
        /* Using MAX() on resource limits only is safe if RLIM_INFINITY is > 0. POSIX declares that rlim_t
1352
         * must be unsigned, hence this is a given, but let's make this clear here. */
1353
        assert_cc(RLIM_INFINITY > 0);
243✔
1354

1355
        mm = physical_memory_scale(1, 8); /* Let's scale how much we allow to be locked by the amount of
243✔
1356
                                           * physical RAM. We allow an eighth to be locked by us, just to
1357
                                           * pick a value. */
1358

1359
        new_rlimit = (struct rlimit) {
486✔
1360
                .rlim_cur = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_cur, mm),
243✔
1361
                .rlim_max = MAX3(HIGH_RLIMIT_MEMLOCK, saved_rlimit->rlim_max, mm),
243✔
1362
        };
1363

1364
        if (saved_rlimit->rlim_max >= new_rlimit.rlim_cur &&
243✔
1365
            saved_rlimit->rlim_cur >= new_rlimit.rlim_max) {
×
1366
                log_debug("RLIMIT_MEMLOCK is already as high or higher than we need it, not bumping.");
×
1367
                return 0;
×
1368
        }
1369

1370
        r = setrlimit_closest(RLIMIT_MEMLOCK, &new_rlimit);
243✔
1371
        if (r < 0)
243✔
1372
                return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
×
1373

1374
        return 0;
1375
}
1376

1377
static int enforce_syscall_archs(Set *archs) {
×
1378
#if HAVE_SECCOMP
1379
        int r;
×
1380

1381
        if (!is_seccomp_available())
×
1382
                return 0;
1383

1384
        r = seccomp_restrict_archs(arg_syscall_archs);
×
1385
        if (r < 0)
×
1386
                return log_error_errno(r, "Failed to enforce system call architecture restriction: %m");
×
1387
#endif
1388
        return 0;
1389
}
1390

1391
static int os_release_status(void) {
36✔
1392
        _cleanup_free_ char *pretty_name = NULL, *fancy_name = NULL,
×
1393
                *name = NULL, *version = NULL, *ansi_color = NULL, *support_end = NULL, *codename = NULL;
36✔
1394
        int r;
36✔
1395

1396
        r = parse_os_release(NULL,
36✔
1397
                             "PRETTY_NAME",      &pretty_name,
1398
                             "FANCY_NAME",       &fancy_name,
1399
                             "NAME",             &name,
1400
                             "VERSION",          &version,
1401
                             "VERSION_CODENAME", &codename,
1402
                             "ANSI_COLOR",       &ansi_color,
1403
                             "SUPPORT_END",      &support_end);
1404
        if (r < 0)
36✔
1405
                return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
×
1406
                                      "Failed to read os-release file, ignoring: %m");
1407

1408
        const char *label = os_release_pretty_name(pretty_name, name);
36✔
1409

1410
        if (show_status_on(arg_show_status)) {
36✔
1411
                const char *color = empty_to_null(ansi_color) ?: "1";
×
1412

1413
                /* The fancy name may contain emoji characters and ANSI sequences. Don't use it if our locale
1414
                 * doesn't allow that, or ANSI sequences are off, or if it is empty. */
1415

1416
                if (!isempty(fancy_name)) {
×
1417
                        _cleanup_free_ char *unescaped = NULL;
×
1418

1419
                        /* Undo one level of C-style unescaping for this one */
1420
                        ssize_t l = cunescape(fancy_name, /* flags= */ 0, &unescaped);
×
1421
                        if (l < 0) {
×
1422
                                log_debug_errno(l, "Failed to unescape FANCY_NAME=, ignoring: %m");
×
1423
                                fancy_name = mfree(fancy_name);
×
1424
                        } else if (!utf8_is_valid(fancy_name)) {
×
1425
                                log_debug("Unescaped FANCY_NAME= contains invalid UTF-8, ignoring.");
×
1426
                                fancy_name = mfree(fancy_name);
×
1427
                        } else {
1428
                                free_and_replace(fancy_name, unescaped);
×
1429

1430
                                /* FANCY_NAME= does not contain version/codename info (unlike PRETTY_NAME=),
1431
                                 * but in this context it makes sense to show them if defined, hence append
1432
                                 * them here. */
1433
                                if (version && !strextend(&fancy_name, " ", version))
×
1434
                                        return log_oom();
×
1435

1436
                                if (codename && !strextend(&fancy_name, " (", codename, ")"))
×
1437
                                        return log_oom();
×
1438
                        }
1439
                }
1440

1441
                if (isempty(fancy_name) ||
×
1442
                    (!emoji_enabled() && !ascii_is_valid(fancy_name)) ||
×
1443
                    !log_get_show_color())
×
1444
                        fancy_name = mfree(fancy_name);
×
1445

1446
                if (!fancy_name && log_get_show_color()) {
×
1447
                        fancy_name = strjoin("\x1B[", color, "m", label);
×
1448
                        if (!fancy_name)
×
1449
                                return log_oom();
×
1450
                }
1451

1452
                if (in_initrd()) {
×
1453
                        if (log_get_show_color())
×
1454
                                status_printf(NULL, 0,
×
1455
                                              ANSI_HIGHLIGHT "Booting initrd of " ANSI_NORMAL "%s" ANSI_NORMAL ANSI_HIGHLIGHT "." ANSI_NORMAL,
1456
                                              fancy_name);
1457
                        else
1458
                                status_printf(NULL, 0,
×
1459
                                              "Booting initrd of %s...", label);
1460
                } else {
1461
                        if (log_get_show_color())
×
1462
                                status_printf(NULL, 0,
×
1463
                                              "\n" ANSI_HIGHLIGHT "Welcome to " ANSI_NORMAL "%s" ANSI_NORMAL ANSI_HIGHLIGHT "!" ANSI_NORMAL "\n",
1464
                                              fancy_name);
1465
                        else
1466
                                status_printf(NULL, 0,
×
1467
                                              "\nWelcome to %s!\n",
1468
                                              label);
1469
                }
1470
        }
1471

1472
        if (support_end && os_release_support_ended(support_end, /* quiet= */ false, /* ret_eol= */ NULL) > 0)
36✔
1473
                /* pretty_name may include the version already, so we'll print the version only if we
1474
                 * have it and we're not using pretty_name. */
1475
                status_printf(ANSI_HIGHLIGHT_RED "  !!  " ANSI_NORMAL, 0,
×
1476
                              "This OS version (%s%s%s) is past its end-of-support date (%s)",
1477
                              label,
1478
                              (pretty_name || !version) ? "" : " version ",
×
1479
                              (pretty_name || !version) ? "" : version,
×
1480
                              support_end);
1481

1482
        return 0;
1483
}
1484

1485
static int setup_os_release(RuntimeScope scope) {
226✔
1486
        char os_release_dst[STRLEN("/run/user//systemd/propagate/.os-release-stage/os-release") + DECIMAL_STR_MAX(uid_t)] =
226✔
1487
                "/run/systemd/propagate/.os-release-stage/os-release";
1488
        const char *os_release_src = "/etc/os-release";
226✔
1489
        int r;
226✔
1490

1491
        assert(IN_SET(scope, RUNTIME_SCOPE_SYSTEM, RUNTIME_SCOPE_USER));
226✔
1492

1493
        if (access("/etc/os-release", F_OK) < 0) {
226✔
1494
                if (errno != ENOENT)
×
1495
                        log_debug_errno(errno, "Failed to check if /etc/os-release exists, ignoring: %m");
×
1496

1497
                os_release_src = "/usr/lib/os-release";
1498
        }
1499

1500
        if (scope == RUNTIME_SCOPE_USER)
226✔
1501
                xsprintf(os_release_dst, "/run/user/" UID_FMT "/systemd/propagate/.os-release-stage/os-release", geteuid());
190✔
1502

1503
        r = mkdir_parents_label(os_release_dst, 0755);
226✔
1504
        if (r < 0)
226✔
1505
                return log_debug_errno(r, "Failed to create parent directory of '%s', ignoring: %m", os_release_dst);
1✔
1506

1507
        r = copy_file_atomic(os_release_src, os_release_dst, 0644, COPY_MAC_CREATE|COPY_REPLACE);
225✔
1508
        if (r < 0)
225✔
1509
                return log_debug_errno(r, "Failed to copy '%s' to '%s', ignoring: %m",
×
1510
                                       os_release_src, os_release_dst);
1511

1512
        return 0;
1513
}
1514

1515
static int write_container_id(void) {
36✔
1516
        const char *c;
36✔
1517
        int r = 0;  /* avoid false maybe-uninitialized warning */
36✔
1518

1519
        c = getenv("container");
36✔
1520
        if (isempty(c))
55✔
1521
                return 0;
1522

1523
        WITH_UMASK(0022)
38✔
1524
                r = write_string_file("/run/systemd/container", c, WRITE_STRING_FILE_CREATE);
19✔
1525
        if (r < 0)
19✔
1526
                return log_warning_errno(r, "Failed to write /run/systemd/container, ignoring: %m");
×
1527

1528
        return 1;
1529
}
1530

1531
static int write_boot_or_shutdown_osc(const char *type) {
50✔
1532
        int r;
50✔
1533

1534
        assert(STRPTR_IN_SET(type, "boot", "shutdown"));
50✔
1535

1536
        if (getenv_terminal_is_dumb())
50✔
1537
                return 0;
50✔
1538

1539
        _cleanup_close_ int fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
50✔
1540
        if (fd < 0)
×
1541
                return log_debug_errno(fd, "Failed to open /dev/console to print %s OSC, ignoring: %m", type);
×
1542

1543
        _cleanup_free_ char *seq = NULL;
×
1544
        if (streq(type, "boot"))
×
1545
                r = osc_context_open_boot(&seq);
×
1546
        else
1547
                r = osc_context_close(SD_ID128_ALLF, &seq);
×
1548
        if (r < 0)
×
1549
                return log_debug_errno(r, "Failed to acquire %s OSC sequence, ignoring: %m", type);
×
1550

1551
        r = loop_write(fd, seq, SIZE_MAX);
×
1552
        if (r < 0)
×
1553
                return log_debug_errno(r, "Failed to write %s OSC sequence, ignoring: %m", type);
×
1554

1555
        if (DEBUG_LOGGING) {
×
1556
                _cleanup_free_ char *h = cescape(seq);
×
1557
                log_debug("OSC sequence for %s successfully written: %s", type, strna(h));
×
1558
        }
1559

1560
        return 0;
1561
}
1562

1563
static int bump_unix_max_dgram_qlen(void) {
36✔
1564
        _cleanup_free_ char *qlen = NULL;
36✔
1565
        unsigned long v;
36✔
1566
        int r;
36✔
1567

1568
        /* Let's bump the net.unix.max_dgram_qlen sysctl. The kernel default of 16 is simply too low. We set
1569
         * the value really really early during boot, so that it is actually applied to all our sockets,
1570
         * including the $NOTIFY_SOCKET one. */
1571

1572
        r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
36✔
1573
        if (r < 0)
36✔
1574
                return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
×
1575
                                      "Failed to read AF_UNIX datagram queue length, ignoring: %m");
1576

1577
        r = safe_atolu(qlen, &v);
36✔
1578
        if (r < 0)
36✔
1579
                return log_warning_errno(r, "Failed to parse AF_UNIX datagram queue length '%s', ignoring: %m", qlen);
×
1580

1581
        if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
36✔
1582
                return 0;
1583

1584
        r = sysctl_write("net/unix/max_dgram_qlen", STRINGIFY(DEFAULT_UNIX_MAX_DGRAM_QLEN));
33✔
1585
        if (r < 0)
33✔
1586
                return log_full_errno(ERRNO_IS_NEG_FS_WRITE_REFUSED(r) ? LOG_DEBUG : LOG_WARNING, r,
×
1587
                                      "Failed to bump AF_UNIX datagram queue length, ignoring: %m");
1588

1589
        return 1;
1590
}
1591

1592
static int fixup_environment(void) {
52✔
1593
        int r;
52✔
1594

1595
        /* Only fix up the environment when we are started as PID 1 */
1596
        if (getpid_cached() != 1)
52✔
1597
                return 0;
52✔
1598

1599
        /* We expect the environment to be set correctly if run inside a container. */
1600
        if (detect_container() > 0)
52✔
1601
                return 0;
1602

1603
        /* When started as PID1, the kernel uses /dev/console for our stdios and uses TERM=linux whatever the
1604
         * backend device used by the console. We try to make a better guess here since some consoles might
1605
         * not have support for color mode for example.
1606
         *
1607
         * However if TERM was configured through the kernel command line then leave it alone. */
1608
        _cleanup_free_ char *term = NULL;
22✔
1609
        r = proc_cmdline_get_key("TERM", 0, &term);
22✔
1610
        if (r < 0)
22✔
1611
                return r;
1612
        if (r > 0) {
22✔
1613
                /* If we pick up $TERM, then also pick up $COLORTERM, $NO_COLOR */
1614
                FOREACH_STRING(v, "COLORTERM", "NO_COLOR") {
66✔
1615
                        _cleanup_free_ char *vv = NULL;
44✔
1616
                        r = proc_cmdline_get_key(v, 0, &vv);
44✔
1617
                        if (r < 0)
44✔
1618
                                return r;
1619
                        if (r > 0 && setenv(v, vv, /* overwrite= */ true) < 0)
44✔
1620
                                return -errno;
×
1621
                }
1622
        } else {
1623
                /* If no $TERM is set then look for the per-tty variable instead */
1624
                r = proc_cmdline_get_key("systemd.tty.term.console", 0, &term);
×
1625
                if (r < 0)
×
1626
                        return r;
1627
        }
1628

1629
        if (!term)
22✔
1630
                (void) query_term_for_tty("/dev/console", &term);
×
1631

1632
        if (setenv("TERM", term ?: FALLBACK_TERM, /* overwrite= */ true) < 0)
22✔
1633
                return -errno;
×
1634

1635
        /* The kernels sets HOME=/ for init. Let's undo this. */
1636
        if (path_equal(getenv("HOME"), "/"))
22✔
1637
                assert_se(unsetenv("HOME") == 0);
14✔
1638

1639
        return 0;
1640
}
1641

1642
static int become_shutdown(int objective, int retval) {
14✔
1643
        static const char* const table[_MANAGER_OBJECTIVE_MAX] = {
14✔
1644
                [MANAGER_EXIT]     = "exit",
1645
                [MANAGER_REBOOT]   = "reboot",
1646
                [MANAGER_POWEROFF] = "poweroff",
1647
                [MANAGER_HALT]     = "halt",
1648
                [MANAGER_KEXEC]    = "kexec",
1649
        };
1650

1651
        char timeout[STRLEN("--timeout=") + DECIMAL_STR_MAX(usec_t) + STRLEN("us")],
14✔
1652
             exit_code[STRLEN("--exit-code=") + DECIMAL_STR_MAX(uint8_t)];
1653

1654
        _cleanup_strv_free_ char **env_block = NULL;
×
1655
        _cleanup_free_ char *max_log_levels = NULL;
14✔
1656
        usec_t watchdog_timer = 0;
14✔
1657
        int r;
14✔
1658

1659
        assert(objective >= 0 && objective < _MANAGER_OBJECTIVE_MAX);
14✔
1660
        assert(table[objective]);
14✔
1661

1662
        xsprintf(timeout, "--timeout=%" PRI_USEC "us", arg_defaults.timeout_stop_usec);
14✔
1663

1664
        const char* command_line[11] = {
14✔
1665
                SYSTEMD_SHUTDOWN_BINARY_PATH,
1666
                table[objective],
1667
                timeout,
1668
                /* Note that the last position is a terminator and must contain NULL. */
1669
        };
1670
        size_t pos = 3;
14✔
1671

1672
        assert(command_line[pos-1]);
14✔
1673
        assert(!command_line[pos]);
14✔
1674

1675
        (void) log_max_levels_to_string(log_get_max_level(), &max_log_levels);
14✔
1676

1677
        if (max_log_levels) {
14✔
1678
                command_line[pos++] = "--log-level";
14✔
1679
                command_line[pos++] = max_log_levels;
14✔
1680
        }
1681

1682
        switch (log_get_target()) {
14✔
1683

1684
        case LOG_TARGET_KMSG:
3✔
1685
        case LOG_TARGET_JOURNAL_OR_KMSG:
1686
        case LOG_TARGET_SYSLOG_OR_KMSG:
1687
                command_line[pos++] = "--log-target=kmsg";
3✔
1688
                break;
3✔
1689

1690
        case LOG_TARGET_NULL:
×
1691
                command_line[pos++] = "--log-target=null";
×
1692
                break;
×
1693

1694
        case LOG_TARGET_CONSOLE:
11✔
1695
        default:
1696
                command_line[pos++] = "--log-target=console";
11✔
1697
        };
14✔
1698

1699
        if (log_get_show_color())
14✔
1700
                command_line[pos++] = "--log-color";
×
1701

1702
        if (log_get_show_location())
14✔
1703
                command_line[pos++] = "--log-location";
×
1704

1705
        if (log_get_show_time())
14✔
1706
                command_line[pos++] = "--log-time";
×
1707

1708
        xsprintf(exit_code, "--exit-code=%d", retval);
14✔
1709
        command_line[pos++] = exit_code;
14✔
1710

1711
        assert(pos < ELEMENTSOF(command_line));
14✔
1712

1713
        /* The watchdog: */
1714

1715
        if (objective == MANAGER_REBOOT)
14✔
1716
                watchdog_timer = arg_reboot_watchdog;
14✔
UNCOV
1717
        else if (objective == MANAGER_KEXEC)
×
1718
                watchdog_timer = arg_kexec_watchdog;
×
1719

1720
        /* If we reboot or kexec let's set the shutdown watchdog and tell the
1721
         * shutdown binary to repeatedly ping it.
1722
         * Disable the pretimeout watchdog, as we do not support it from the shutdown binary. */
1723
        (void) watchdog_setup_pretimeout(0);
14✔
1724
        (void) watchdog_setup_pretimeout_governor(NULL);
14✔
1725
        r = watchdog_setup(watchdog_timer);
14✔
1726
        watchdog_close(/* disarm= */ r < 0);
14✔
1727

1728
        /* The environment block: */
1729

1730
        env_block = strv_copy(environ);
14✔
1731

1732
        /* Tell the binary how often to ping, ignore failure */
1733
        (void) strv_extendf(&env_block, "WATCHDOG_USEC="USEC_FMT, watchdog_timer);
14✔
1734

1735
        /* Make sure that tools that look for $WATCHDOG_USEC (and might get started by the exitrd) don't get
1736
         * confused by the variable, because the sd_watchdog_enabled() protocol uses the same variable for
1737
         * the same purposes. */
1738
        (void) strv_extendf(&env_block, "WATCHDOG_PID=" PID_FMT, getpid_cached());
14✔
1739

1740
        if (arg_watchdog_device)
14✔
1741
                (void) strv_extendf(&env_block, "WATCHDOG_DEVICE=%s", arg_watchdog_device);
×
1742

1743
        (void) write_boot_or_shutdown_osc("shutdown");
14✔
1744

1745
        execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
14✔
1746
        return -errno;
14✔
1747
}
1748

1749
static void initialize_clock_timewarp(void) {
17✔
1750
        int r;
17✔
1751

1752
        /* This is called very early on, before we parse the kernel command line or otherwise figure out why
1753
         * we are running, but only once. */
1754

1755
        if (clock_is_localtime(NULL) > 0) {
17✔
1756
                int min;
×
1757

1758
                /* The very first call of settimeofday() also does a time warp in the kernel.
1759
                 *
1760
                 * In the rtc-in-local time mode, we set the kernel's timezone, and rely on external tools to
1761
                 * take care of maintaining the RTC and do all adjustments. This matches the behavior of
1762
                 * Windows, which leaves the RTC alone if the registry tells that the RTC runs in UTC.
1763
                 */
1764
                r = clock_set_timezone(&min);
×
1765
                if (r < 0)
×
1766
                        log_error_errno(r, "Failed to apply local time delta, ignoring: %m");
×
1767
                else
1768
                        log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
×
1769

1770
        } else if (!in_initrd())
17✔
1771
                /*
1772
                 * Do a dummy very first call to seal the kernel's time warp magic.
1773
                 *
1774
                 * Do not call this from inside the initrd. The initrd might not carry /etc/adjtime with
1775
                 * LOCAL, but the real system could be set up that way. In such case, we need to delay the
1776
                 * time-warp or the sealing until we reach the real system.
1777
                 *
1778
                 * Do no set the kernel's timezone. The concept of local time cannot be supported reliably,
1779
                 * the time will jump or be incorrect at every daylight saving time change. All kernel local
1780
                 * time concepts will be treated as UTC that way.
1781
                 */
1782
                (void) clock_reset_timewarp();
6✔
1783
}
17✔
1784

1785
static void apply_clock_update(void) {
226✔
1786
        /* This is called later than clock_apply_epoch(), i.e. after we have parsed
1787
         * configuration files/kernel command line and such. */
1788

1789
        if (arg_clock_usec == 0)
226✔
1790
                return;
1791

1792
        if (getpid_cached() != 1)
×
1793
                return;
1794

1795
        if (clock_settime(CLOCK_REALTIME, TIMESPEC_STORE(arg_clock_usec)) < 0)
×
1796
                log_error_errno(errno, "Failed to set system clock to time specified on kernel command line: %m");
×
1797
        else
1798
                log_info("Set system clock to %s, as specified on the kernel command line.",
×
1799
                         FORMAT_TIMESTAMP(arg_clock_usec));
1800
}
1801

1802
static void cmdline_take_random_seed(void) {
226✔
1803
        size_t suggested;
226✔
1804
        int r;
226✔
1805

1806
        if (arg_random_seed_size == 0)
226✔
1807
                return;
1808

1809
        if (getpid_cached() != 1)
×
1810
                return;
1811

1812
        assert(arg_random_seed);
×
1813
        suggested = random_pool_size();
×
1814

1815
        if (arg_random_seed_size < suggested)
×
1816
                log_warning("Random seed specified on kernel command line has size %zu, but %zu bytes required to fill entropy pool.",
×
1817
                            arg_random_seed_size, suggested);
1818

1819
        r = random_write_entropy(-1, arg_random_seed, arg_random_seed_size, true);
×
1820
        if (r < 0) {
×
1821
                log_warning_errno(r, "Failed to credit entropy specified on kernel command line, ignoring: %m");
×
1822
                return;
×
1823
        }
1824

1825
        log_notice("Successfully credited entropy passed on kernel command line.\n"
×
1826
                   "Note that the seed provided this way is accessible to unprivileged programs. "
1827
                   "This functionality should not be used outside of testing environments.");
1828
}
1829

1830
static void initialize_coredump(bool skip_setup) {
52✔
1831
        if (getpid_cached() != 1)
52✔
1832
                return;
1833

1834
        /* Don't limit the core dump size, so that coredump handlers such as systemd-coredump (which honour
1835
         * the limit) will process core dumps for system services by default. */
1836
        if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
52✔
1837
                log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
×
1838

1839
        /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
1840
         * until the systemd-coredump tool is enabled via sysctl. However it can be changed via the kernel
1841
         * command line later so core dumps can still be generated during early startup and in initrd. */
1842
        if (!skip_setup)
52✔
1843
                disable_coredumps();
36✔
1844
}
1845

1846
static void initialize_core_pattern(bool skip_setup) {
243✔
1847
        int r;
243✔
1848

1849
        if (skip_setup || !arg_early_core_pattern)
243✔
1850
                return;
1851

1852
        if (getpid_cached() != 1)
36✔
1853
                return;
1854

1855
        r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
36✔
1856
        if (r < 0)
36✔
1857
                log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m",
19✔
1858
                                  arg_early_core_pattern);
1859
}
1860

1861
static void apply_protect_system(bool skip_setup) {
243✔
1862
        int r;
243✔
1863

1864
        if (skip_setup || getpid_cached() != 1 || arg_protect_system == 0)
243✔
1865
                return;
207✔
1866

1867
        if (arg_protect_system < 0 && !in_initrd()) {
36✔
1868
                log_debug("ProtectSystem=auto selected, but not running in an initrd, skipping.");
25✔
1869
                return;
25✔
1870
        }
1871

1872
        r = make_mount_point("/usr");
11✔
1873
        if (r < 0) {
11✔
1874
                log_warning_errno(r, "Failed to make /usr/ a mount point, ignoring: %m");
×
1875
                return;
×
1876
        }
1877

1878
        if (mount_nofollow_verbose(
11✔
1879
                        LOG_WARNING,
1880
                        /* what= */ NULL,
1881
                        "/usr",
1882
                        /* fstype= */ NULL,
1883
                        MS_BIND|MS_REMOUNT|MS_RDONLY,
1884
                        /* options= */ NULL) < 0)
1885
                return;
1886

1887
        log_info("Successfully made /usr/ read-only.");
11✔
1888
}
1889

1890
static void update_cpu_affinity(bool skip_setup) {
306✔
1891
        _cleanup_free_ char *mask = NULL;
×
1892

1893
        if (skip_setup || !arg_cpu_affinity.set)
306✔
1894
                return;
1895

1896
        assert(arg_cpu_affinity.allocated > 0);
×
1897

1898
        mask = cpu_set_to_range_string(&arg_cpu_affinity);
×
1899
        log_debug("Setting CPU affinity to {%s}.", strnull(mask));
×
1900

1901
        if (sched_setaffinity(0, arg_cpu_affinity.allocated, arg_cpu_affinity.set) < 0)
×
1902
                log_warning_errno(errno, "Failed to set CPU affinity, ignoring: %m");
×
1903
}
1904

1905
static void update_numa_policy(bool skip_setup) {
306✔
1906
        int r;
306✔
1907
        _cleanup_free_ char *nodes = NULL;
×
1908
        const char * policy = NULL;
306✔
1909

1910
        if (skip_setup || !mpol_is_valid(numa_policy_get_type(&arg_numa_policy)))
595✔
1911
                return;
1912

1913
        if (DEBUG_LOGGING) {
×
1914
                policy = mpol_to_string(numa_policy_get_type(&arg_numa_policy));
×
1915
                nodes = cpu_set_to_range_string(&arg_numa_policy.nodes);
×
1916
                log_debug("Setting NUMA policy to %s, with nodes {%s}.", strnull(policy), strnull(nodes));
×
1917
        }
1918

1919
        r = apply_numa_policy(&arg_numa_policy);
×
1920
        if (r == -EOPNOTSUPP)
×
1921
                log_debug_errno(r, "NUMA support not available, ignoring.");
×
1922
        else if (r < 0)
×
1923
                log_warning_errno(r, "Failed to set NUMA memory policy, ignoring: %m");
×
1924
}
1925

1926
static void filter_args(
39✔
1927
                const char* dst[],
1928
                size_t *dst_index,
1929
                char **src,
1930
                int argc) {
1931

1932
        assert(dst);
39✔
1933
        assert(dst_index);
39✔
1934

1935
        /* Copy some filtered arguments into the dst array from src. */
1936
        for (int i = 1; i < argc; i++) {
518✔
1937
                if (STR_IN_SET(src[i],
479✔
1938
                               "--switched-root",
1939
                               "--system",
1940
                               "--user"))
1941
                        continue;
17✔
1942

1943
                if (startswith(src[i], "--deserialize="))
462✔
1944
                        continue;
16✔
1945
                if (streq(src[i], "--deserialize")) {
446✔
1946
                        i++;                            /* Skip the argument too */
×
1947
                        continue;
×
1948
                }
1949

1950
                /* Skip target unit designators. We already acted upon this information and have queued
1951
                 * appropriate jobs. We don't want to redo all this after reexecution. */
1952
                if (startswith(src[i], "--unit="))
446✔
1953
                        continue;
×
1954
                if (streq(src[i], "--unit")) {
446✔
1955
                        i++;                            /* Skip the argument too */
×
1956
                        continue;
×
1957
                }
1958

1959
                /* Seems we have a good old option. Let's pass it over to the new instance. */
1960
                dst[(*dst_index)++] = src[i];
446✔
1961
        }
1962
}
39✔
1963

1964
static void finish_remaining_processes(ManagerObjective objective) {
39✔
1965
        assert(objective >= 0 && objective < _MANAGER_OBJECTIVE_MAX);
39✔
1966

1967
        /* Kill all remaining processes from the initrd, but don't wait for them, so that we can handle the
1968
         * SIGCHLD for them after deserializing. */
1969
        if (IN_SET(objective, MANAGER_SWITCH_ROOT, MANAGER_SOFT_REBOOT))
39✔
1970
                broadcast_signal(SIGTERM, /* wait_for_exit= */ false, /* send_sighup= */ true, arg_defaults.timeout_stop_usec);
11✔
1971

1972
        /* On soft reboot really make sure nothing is left. Note that this will skip cgroups
1973
         * of units that were configured with SurviveFinalKillSignal=yes. */
1974
        if (objective == MANAGER_SOFT_REBOOT)
39✔
1975
                broadcast_signal(SIGKILL, /* wait_for_exit= */ false, /* send_sighup= */ false, arg_defaults.timeout_stop_usec);
×
1976
}
39✔
1977

1978
static void reduce_vt(ManagerObjective objective) {
39✔
1979
        int r;
39✔
1980

1981
        if (objective != MANAGER_SOFT_REBOOT)
39✔
1982
                return;
1983

1984
        /* Switches back to VT 1, and releases all other VTs, in an attempt to return to a situation similar
1985
         * to how it was during the original kernel initialization. This is important because if some random
1986
         * TTY is in foreground, /dev/console will end up pointing to it, where the future init system will
1987
         * then write its status output to, but where it probably shouldn't be writing to. */
1988

1989
        r = chvt(1);
×
1990
        if (r < 0)
×
1991
                log_debug_errno(r, "Failed to switch to VT TTY 1, ignoring: %m");
×
1992

1993
        _cleanup_close_ int tty0_fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
39✔
1994
        if (tty0_fd < 0)
×
1995
                return (void) log_debug_errno(tty0_fd, "Failed to open '/dev/tty0', ignoring: %m");
×
1996

1997
        for (int ttynr = 2; ttynr <= VTNR_MAX; ttynr++)
×
1998
                if (ioctl(tty0_fd, VT_DISALLOCATE, ttynr) < 0)
×
1999
                        log_debug_errno(errno, "Failed to disallocate VT TTY %i, ignoring: %m", ttynr);
×
2000
                else
2001
                        log_debug("Successfully disallocated VT TTY %i.", ttynr);
×
2002
}
2003

2004
static int do_reexecute(
39✔
2005
                ManagerObjective objective,
2006
                int argc,
2007
                char* argv[],
2008
                const struct rlimit *saved_rlimit_nofile,
2009
                const struct rlimit *saved_rlimit_memlock,
2010
                FDSet *fds,
2011
                const char *switch_root_dir,
2012
                const char *switch_root_init,
2013
                uint64_t saved_capability_ambient_set,
2014
                const char **ret_error_message) {
2015

2016
        size_t i, args_size;
39✔
2017
        const char **args;
39✔
2018
        int r;
39✔
2019

2020
        assert(IN_SET(objective, MANAGER_REEXECUTE, MANAGER_SWITCH_ROOT, MANAGER_SOFT_REBOOT));
39✔
2021
        assert(argc >= 0);
39✔
2022
        assert(saved_rlimit_nofile);
39✔
2023
        assert(saved_rlimit_memlock);
39✔
2024
        assert(ret_error_message);
39✔
2025

2026
        /* Close and disarm the watchdog, so that the new instance can reinitialize it, but the machine
2027
         * doesn't get rebooted while we do that. */
2028
        watchdog_close(/* disarm= */ true);
39✔
2029

2030
        if (!switch_root_dir && objective == MANAGER_SOFT_REBOOT) {
39✔
2031
                /* If no switch root dir is specified, then check if /run/nextroot/ qualifies and use that */
2032
                r = path_is_os_tree("/run/nextroot");
×
2033
                if (r < 0 && r != -ENOENT)
×
2034
                        log_debug_errno(r, "Failed to determine if /run/nextroot/ is a valid OS tree, ignoring: %m");
×
2035
                else if (r > 0)
×
2036
                        switch_root_dir = "/run/nextroot";
2037
        }
2038

2039
        if (switch_root_dir) {
39✔
2040
                /* If we're supposed to switch root, preemptively check the existence of a usable init.
2041
                 * Otherwise the system might end up in a completely undebuggable state afterwards. */
2042
                if (switch_root_init) {
11✔
2043
                        r = chase_and_access(switch_root_init, switch_root_dir, CHASE_PREFIX_ROOT, X_OK, /* ret_path= */ NULL);
×
2044
                        if (r < 0)
×
2045
                                log_warning_errno(r, "Failed to chase configured init %s/%s: %m",
×
2046
                                                  switch_root_dir, switch_root_init);
2047
                } else {
2048
                        r = chase_and_access(SYSTEMD_BINARY_PATH, switch_root_dir, CHASE_PREFIX_ROOT, X_OK, /* ret_path= */ NULL);
11✔
2049
                        if (r < 0)
11✔
2050
                                log_debug_errno(r, "Failed to chase our own binary %s/%s: %m",
×
2051
                                                switch_root_dir, SYSTEMD_BINARY_PATH);
2052
                }
2053

2054
                if (r < 0) {
×
2055
                        r = chase_and_access("/sbin/init", switch_root_dir, CHASE_PREFIX_ROOT, X_OK, /* ret_path= */ NULL);
×
2056
                        if (r < 0) {
×
2057
                                *ret_error_message = "Switch root target contains no usable init";
×
2058
                                return log_error_errno(r, "Failed to chase %s/sbin/init", switch_root_dir);
×
2059
                        }
2060
                }
2061
        }
2062

2063
        /* Reset RLIMIT_NOFILE + RLIMIT_MEMLOCK back to the kernel defaults, so that the new systemd can pass
2064
         * the kernel default to its child processes */
2065
        if (saved_rlimit_nofile->rlim_cur != 0)
39✔
2066
                (void) setrlimit(RLIMIT_NOFILE, saved_rlimit_nofile);
39✔
2067
        if (saved_rlimit_memlock->rlim_cur != RLIM_INFINITY)
39✔
2068
                (void) setrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock);
39✔
2069

2070
        finish_remaining_processes(objective);
39✔
2071
        reduce_vt(objective);
39✔
2072

2073
        if (switch_root_dir) {
39✔
2074
                r = switch_root(/* new_root= */ switch_root_dir,
33✔
2075
                                /* old_root_after= */ NULL,
2076
                                /* flags= */ (objective == MANAGER_SWITCH_ROOT ? SWITCH_ROOT_DESTROY_OLD_ROOT : 0) |
11✔
2077
                                             (objective == MANAGER_SOFT_REBOOT ? 0 : SWITCH_ROOT_RECURSIVE_RUN));
11✔
2078
                if (r < 0)
11✔
2079
                        log_error_errno(r, "Failed to switch root, trying to continue: %m");
×
2080
        }
2081

2082
        r = capability_ambient_set_apply(saved_capability_ambient_set, /* also_inherit= */ false);
39✔
2083
        if (r < 0)
39✔
2084
                log_warning_errno(r, "Failed to apply the starting ambient set, ignoring: %m");
×
2085

2086
        args_size = argc + 5;
39✔
2087
        args = newa(const char*, args_size);
39✔
2088

2089
        if (!switch_root_init) {
39✔
2090
                char sfd[STRLEN("--deserialize=") + DECIMAL_STR_MAX(int)];
39✔
2091

2092
                /* First try to spawn ourselves with the right path, and with full serialization. We do this
2093
                 * only if the user didn't specify an explicit init to spawn. */
2094

2095
                assert(arg_serialization);
39✔
2096
                assert(fds);
39✔
2097

2098
                xsprintf(sfd, "--deserialize=%i", fileno(arg_serialization));
39✔
2099

2100
                i = 1;         /* Leave args[0] empty for now. */
39✔
2101

2102
                /* Put our stuff first to make sure it always gets parsed in case
2103
                 * we get weird stuff from the kernel cmdline (like --) */
2104
                if (IN_SET(objective, MANAGER_SWITCH_ROOT, MANAGER_SOFT_REBOOT))
39✔
2105
                        args[i++] = "--switched-root";
11✔
2106
                args[i++] = runtime_scope_cmdline_option_to_string(arg_runtime_scope);
39✔
2107
                args[i++] = sfd;
39✔
2108

2109
                filter_args(args, &i, argv, argc);
39✔
2110

2111
                args[i++] = NULL;
39✔
2112

2113
                assert(i <= args_size);
39✔
2114

2115
                /*
2116
                 * We want valgrind to print its memory usage summary before reexecution. Valgrind won't do
2117
                 * this is on its own on exec(), but it will do it on exit(). Hence, to ensure we get a
2118
                 * summary here, fork() off a child, let it exit() cleanly, so that it prints the summary,
2119
                 * and wait() for it in the parent, before proceeding into the exec().
2120
                 */
2121
                valgrind_summary_hack();
39✔
2122

2123
                args[0] = SYSTEMD_BINARY_PATH;
×
2124
                (void) execv(args[0], (char* const*) args);
×
2125

2126
                if (objective == MANAGER_REEXECUTE) {
×
2127
                        *ret_error_message = "Failed to execute our own binary";
×
2128
                        return log_error_errno(errno, "Failed to execute our own binary %s: %m", args[0]);
×
2129
                }
2130

2131
                log_debug_errno(errno, "Failed to execute our own binary %s, trying fallback: %m", args[0]);
×
2132
        }
2133

2134
        /* Try the fallback, if there is any, without any serialization. We pass the original argv[] and
2135
         * envp[]. (Well, modulo the ordering changes due to getopt() in argv[], and some cleanups in envp[],
2136
         * but let's hope that doesn't matter.) */
2137

2138
        arg_serialization = safe_fclose(arg_serialization);
×
2139
        fds = fdset_free(fds);
×
2140

2141
        /* Drop /run/systemd directory. Some of its content can be used as a flag indicating that systemd is
2142
         * the init system but we might be replacing it with something different. If systemd is used again it
2143
         * will recreate the directory and its content anyway. */
2144
        r = rm_rf("/run/systemd.pre-switch-root", REMOVE_ROOT|REMOVE_MISSING_OK);
×
2145
        if (r < 0)
×
2146
                log_warning_errno(r, "Failed to prepare /run/systemd.pre-switch-root/, ignoring: %m");
×
2147

2148
        r = RET_NERRNO(rename("/run/systemd", "/run/systemd.pre-switch-root"));
×
2149
        if (r < 0)
×
2150
                log_warning_errno(r, "Failed to move /run/systemd/ to /run/systemd.pre-switch-root/, ignoring: %m");
×
2151

2152
        /* Reopen the console */
2153
        (void) make_console_stdio();
×
2154

2155
        i = 1;         /* Leave args[0] empty for now. */
×
2156
        for (int j = 1; j <= argc; j++)
×
2157
                args[i++] = argv[j];
×
2158
        assert(i <= args_size);
×
2159

2160
        /* Re-enable any blocked signals, especially important if we switch from initrd to init=... */
2161
        (void) reset_all_signal_handlers();
×
2162
        (void) reset_signal_mask();
×
2163
        (void) rlimit_nofile_safe();
×
2164

2165
        if (switch_root_init) {
×
2166
                args[0] = switch_root_init;
×
2167
                (void) execve(args[0], (char* const*) args, saved_env);
×
2168
                log_warning_errno(errno, "Failed to execute configured init %s, trying fallback: %m", args[0]);
×
2169
        }
2170

2171
        args[0] = "/sbin/init";
×
2172
        (void) execv(args[0], (char* const*) args);
×
2173
        r = -errno;
×
2174
        *ret_error_message = "Failed to execute /sbin/init";
×
2175

2176
        if (r == -ENOENT) {
×
2177
                manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
×
2178
                                      ANSI_HIGHLIGHT_RED "  !!  " ANSI_NORMAL,
2179
                                      "%s", *ret_error_message);
2180

2181
                log_warning_errno(r, "No /sbin/init, trying fallback shell");
×
2182

2183
                args[0] = "/bin/sh";
×
2184
                args[1] = NULL;
×
2185
                (void) execve(args[0], (char* const*) args, saved_env);
×
2186
                r = -errno;
×
2187
                *ret_error_message = "Failed to execute fallback shell";
×
2188
        }
2189

2190
        return log_error_errno(r, "%s, giving up: %m", *ret_error_message);
×
2191
}
2192

2193
static int invoke_main_loop(
243✔
2194
                Manager *m,
2195
                const struct rlimit *saved_rlimit_nofile,
2196
                const struct rlimit *saved_rlimit_memlock,
2197
                int *ret_retval,                   /* Return parameters relevant for shutting down */
2198
                FDSet **ret_fds,                   /* Return parameters for reexecuting */
2199
                char **ret_switch_root_dir,        /* … */
2200
                char **ret_switch_root_init,       /* … */
2201
                const char **ret_error_message) {
2202

2203
        int r;
243✔
2204

2205
        assert(m);
243✔
2206
        assert(saved_rlimit_nofile);
243✔
2207
        assert(saved_rlimit_memlock);
243✔
2208
        assert(ret_retval);
243✔
2209
        assert(ret_fds);
243✔
2210
        assert(ret_switch_root_dir);
243✔
2211
        assert(ret_switch_root_init);
243✔
2212
        assert(ret_error_message);
243✔
2213

2214
        for (;;) {
369✔
2215
                int objective = manager_loop(m);
306✔
2216
                if (objective < 0) {
306✔
2217
                        *ret_error_message = "Failed to run main loop";
×
2218
                        return log_struct_errno(LOG_EMERG, objective,
×
2219
                                                LOG_MESSAGE("Failed to run main loop: %m"),
2220
                                                LOG_MESSAGE_ID(SD_MESSAGE_CORE_MAINLOOP_FAILED_STR));
2221
                }
2222

2223
                /* Ensure shutdown timestamp is taken even when bypassing the job engine */
2224
                if (IN_SET(objective,
306✔
2225
                           MANAGER_SOFT_REBOOT,
2226
                           MANAGER_REBOOT,
2227
                           MANAGER_KEXEC,
2228
                           MANAGER_HALT,
2229
                           MANAGER_POWEROFF) &&
2230
                    !dual_timestamp_is_set(m->timestamps + MANAGER_TIMESTAMP_SHUTDOWN_START))
14✔
2231
                        dual_timestamp_now(m->timestamps + MANAGER_TIMESTAMP_SHUTDOWN_START);
×
2232

2233
                switch (objective) {
306✔
2234

2235
                case MANAGER_RELOAD: {
126✔
2236
                        LogTarget saved_log_target;
63✔
2237
                        int saved_log_level;
63✔
2238

2239
                        manager_send_reloading(m);
63✔
2240

2241
                        log_info("Reloading...");
63✔
2242

2243
                        /* First, save any overridden log level/target, then parse the configuration file,
2244
                         * which might change the log level to new settings. */
2245

2246
                        saved_log_level = m->log_level_overridden ? log_get_max_level() : -1;
63✔
2247
                        saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID;
63✔
2248

2249
                        (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock);
63✔
2250

2251
                        set_manager_defaults(m);
63✔
2252
                        set_manager_settings(m);
63✔
2253

2254
                        update_cpu_affinity(false);
63✔
2255
                        update_numa_policy(false);
63✔
2256

2257
                        if (saved_log_level >= 0)
63✔
2258
                                manager_override_log_level(m, saved_log_level);
1✔
2259
                        if (saved_log_target >= 0)
63✔
2260
                                manager_override_log_target(m, saved_log_target);
1✔
2261

2262
                        if (manager_reload(m) < 0)
63✔
2263
                                /* Reloading failed before the point of no return.
2264
                                 * Let's continue running as if nothing happened. */
2265
                                m->objective = MANAGER_OK;
×
2266
                        else
2267
                                log_info("Reloading finished in " USEC_FMT " ms.",
126✔
2268
                                         usec_sub_unsigned(now(CLOCK_MONOTONIC), m->timestamps[MANAGER_TIMESTAMP_UNITS_LOAD].monotonic) / USEC_PER_MSEC);
2269

2270
                        continue;
63✔
2271
                }
2272

2273
                case MANAGER_REEXECUTE:
28✔
2274

2275
                        manager_send_reloading(m); /* From the perspective of the manager calling us this is
28✔
2276
                                                    * pretty much the same as a reload */
2277

2278
                        r = prepare_reexecute(m, &arg_serialization, ret_fds, false);
28✔
2279
                        if (r < 0) {
28✔
2280
                                *ret_error_message = "Failed to prepare for reexecution";
×
2281
                                return r;
×
2282
                        }
2283

2284
                        log_notice("Reexecuting.");
28✔
2285

2286
                        *ret_retval = EXIT_FAILURE;
28✔
2287
                        *ret_switch_root_dir = *ret_switch_root_init = NULL;
28✔
2288

2289
                        return objective;
28✔
2290

2291
                case MANAGER_SWITCH_ROOT:
11✔
2292

2293
                        manager_send_reloading(m); /* From the perspective of the manager calling us this is
11✔
2294
                                                    * pretty much the same as a reload */
2295

2296
                        manager_set_switching_root(m, true);
11✔
2297

2298
                        if (!m->switch_root_init) {
11✔
2299
                                r = prepare_reexecute(m, &arg_serialization, ret_fds, true);
11✔
2300
                                if (r < 0) {
11✔
2301
                                        *ret_error_message = "Failed to prepare for reexecution";
×
2302
                                        return r;
×
2303
                                }
2304
                        } else
2305
                                *ret_fds = NULL;
×
2306

2307
                        log_notice("Switching root.");
11✔
2308

2309
                        *ret_retval = EXIT_FAILURE;
11✔
2310

2311
                        /* Steal the switch root parameters */
2312
                        *ret_switch_root_dir = TAKE_PTR(m->switch_root);
11✔
2313
                        *ret_switch_root_init = TAKE_PTR(m->switch_root_init);
11✔
2314

2315
                        return objective;
11✔
2316

2317
                case MANAGER_SOFT_REBOOT:
×
2318
                        manager_send_reloading(m);
×
2319
                        manager_set_switching_root(m, true);
×
2320

2321
                        r = prepare_reexecute(m, &arg_serialization, ret_fds, /* switching_root= */ true);
×
2322
                        if (r < 0) {
×
2323
                                *ret_error_message = "Failed to prepare for reexecution";
×
2324
                                return r;
×
2325
                        }
2326

2327
                        log_notice("Soft-rebooting.");
×
2328

2329
                        *ret_retval = EXIT_FAILURE;
×
2330
                        *ret_switch_root_dir = TAKE_PTR(m->switch_root);
×
2331
                        *ret_switch_root_init = NULL;
×
2332

2333
                        return objective;
×
2334

2335
                case MANAGER_EXIT:
190✔
2336
                        if (MANAGER_IS_USER(m)) {
190✔
2337
                                log_debug("Exit.");
190✔
2338

2339
                                *ret_retval = m->return_value;
190✔
2340
                                *ret_fds = NULL;
190✔
2341
                                *ret_switch_root_dir = *ret_switch_root_init = NULL;
190✔
2342

2343
                                return objective;
190✔
2344
                        }
2345

2346
                        _fallthrough_;
14✔
2347
                case MANAGER_REBOOT:
2348
                case MANAGER_POWEROFF:
2349
                case MANAGER_HALT:
2350
                case MANAGER_KEXEC: {
2351
                        log_notice("Shutting down.");
14✔
2352

2353
                        *ret_retval = m->return_value;
14✔
2354
                        *ret_fds = NULL;
14✔
2355
                        *ret_switch_root_dir = *ret_switch_root_init = NULL;
14✔
2356

2357
                        return objective;
14✔
2358
                }
2359

2360
                default:
×
2361
                        assert_not_reached();
×
2362
                }
2363
        }
2364
}
2365

2366
static void log_execution_mode(bool *ret_first_boot) {
243✔
2367
        bool first_boot = false;
243✔
2368
        int r;
243✔
2369

2370
        assert(ret_first_boot);
243✔
2371

2372
        switch (arg_runtime_scope) {
243✔
2373

2374
        case RUNTIME_SCOPE_SYSTEM: {
2375
                struct utsname uts;
52✔
2376
                int v;
52✔
2377

2378
                log_info("systemd " GIT_VERSION " running in %ssystem mode (%s)",
104✔
2379
                         arg_action == ACTION_TEST ? "test " : "",
2380
                         systemd_features);
2381

2382
                v = detect_virtualization();
52✔
2383
                if (v > 0)
52✔
2384
                        log_info("Detected virtualization %s.", virtualization_to_string(v));
52✔
2385

2386
                v = detect_confidential_virtualization();
52✔
2387
                if (v > 0)
52✔
2388
                        log_info("Detected confidential virtualization %s.", confidential_virtualization_to_string(v));
×
2389

2390
                log_info("Detected architecture %s.", architecture_to_string(uname_architecture()));
52✔
2391

2392
                if (in_initrd())
52✔
2393
                        log_info("Running in initrd.");
11✔
2394
                else {
2395
                        _cleanup_free_ char *id_text = NULL;
41✔
2396

2397
                        /* Let's check whether we are in first boot. First, check if an override was
2398
                         * specified on the kernel command line. If yes, we honour that. */
2399

2400
                        r = proc_cmdline_get_bool("systemd.condition_first_boot", /* flags= */ 0, &first_boot);
41✔
2401
                        if (r < 0)
41✔
2402
                                log_debug_errno(r, "Failed to parse systemd.condition_first_boot= kernel command line argument, ignoring: %m");
×
2403

2404
                        if (r > 0)
41✔
2405
                                log_full(first_boot ? LOG_INFO : LOG_DEBUG,
×
2406
                                         "Kernel command line argument says we are %s first boot.",
2407
                                         first_boot ? "in" : "not in");
2408
                        else {
2409
                                /* Second, perform autodetection. We use /etc/machine-id as flag file for
2410
                                 * this: If it is missing or contains the value "uninitialized", this is the
2411
                                 * first boot. In other cases, it is not. This allows container managers and
2412
                                 * installers to provision a couple of files in /etc but still permit the
2413
                                 * first-boot initialization to occur. If the container manager wants to
2414
                                 * provision the machine ID it should pass $container_uuid to PID 1. */
2415

2416
                                r = read_one_line_file("/etc/machine-id", &id_text);
41✔
2417
                                if (r < 0 || streq(id_text, "uninitialized")) {
41✔
2418
                                        if (r < 0 && r != -ENOENT)
18✔
2419
                                                log_warning_errno(r, "Unexpected error while reading /etc/machine-id, assuming first boot: %m");
×
2420

2421
                                        first_boot = true;
18✔
2422
                                        log_info("Detected first boot.");
18✔
2423
                                } else
2424
                                        log_debug("Detected initialized system, this is not the first boot.");
23✔
2425
                        }
2426
                }
2427

2428
                assert_se(uname(&uts) >= 0);
52✔
2429

2430
                if (strverscmp_improved(uts.release, KERNEL_BASELINE_VERSION) < 0)
52✔
2431
                        log_warning("Warning! Reported kernel version %s is older than systemd's required baseline kernel version %s. "
×
2432
                                    "Your mileage may vary.", uts.release, KERNEL_BASELINE_VERSION);
2433
                else
2434
                        log_debug("Kernel version %s, our baseline is %s", uts.release, KERNEL_BASELINE_VERSION);
52✔
2435

2436
                break;
52✔
2437
        }
2438

2439
        case RUNTIME_SCOPE_USER:
191✔
2440
                if (DEBUG_LOGGING) {
191✔
2441
                        _cleanup_free_ char *t = NULL;
191✔
2442

2443
                        t = uid_to_name(getuid());
191✔
2444
                        log_debug("systemd " GIT_VERSION " running in %suser mode for user " UID_FMT "/%s. (%s)",
382✔
2445
                                  arg_action == ACTION_TEST ? " test" : "",
2446
                                  getuid(), strna(t), systemd_features);
2447
                }
2448

2449
                break;
2450

2451
        default:
×
2452
                assert_not_reached();
×
2453
        }
2454

2455
        *ret_first_boot = first_boot;
243✔
2456
}
243✔
2457

2458
static int initialize_runtime(
243✔
2459
                bool skip_setup,
2460
                bool first_boot,
2461
                struct rlimit *saved_rlimit_nofile,
2462
                struct rlimit *saved_rlimit_memlock,
2463
                uint64_t *saved_ambient_set,
2464
                const char **ret_error_message) {
2465

2466
        int r;
243✔
2467

2468
        assert(saved_ambient_set);
243✔
2469
        assert(ret_error_message);
243✔
2470

2471
        /* Sets up various runtime parameters. Many of these initializations are conditionalized:
2472
         *
2473
         * - Some only apply to --system instances
2474
         * - Some only apply to --user instances
2475
         * - Some only apply when we first start up, but not when we reexecute
2476
         */
2477

2478
        if (arg_action != ACTION_RUN)
243✔
2479
                return 0;
2480

2481
        update_cpu_affinity(skip_setup);
243✔
2482
        update_numa_policy(skip_setup);
243✔
2483

2484
        switch (arg_runtime_scope) {
243✔
2485

2486
        case RUNTIME_SCOPE_SYSTEM:
52✔
2487
                /* Make sure we leave a core dump without panicking the kernel. */
2488
                install_crash_handler();
52✔
2489

2490
                if (!skip_setup) {
52✔
2491
                        /* Check that /usr/ is either on the same file system as / or mounted already. */
2492
                        if (dir_is_empty("/usr", /* ignore_hidden_or_backup= */ true) > 0) {
36✔
2493
                                *ret_error_message = "Refusing to run in unsupported environment where /usr/ is not populated";
×
2494
                                return -ENOEXEC;
×
2495
                        }
2496

2497
                        /* Pull credentials from various sources into a common credential directory (we do
2498
                         * this here, before setting up the machine ID, so that we can use credential info
2499
                         * for setting up the machine ID) */
2500
                        (void) import_credentials();
36✔
2501

2502
                        (void) os_release_status();
36✔
2503
                        (void) machine_id_setup(/* root= */ NULL, arg_machine_id,
72✔
2504
                                                (first_boot ? MACHINE_ID_SETUP_FORCE_TRANSIENT : 0) |
36✔
2505
                                                (arg_machine_id_from_firmware ? MACHINE_ID_SETUP_FORCE_FIRMWARE : 0),
36✔
2506
                                                /* ret= */ NULL);
2507
                        (void) hostname_setup(/* really= */ true);
36✔
2508
                        (void) loopback_setup();
36✔
2509

2510
                        bump_unix_max_dgram_qlen();
36✔
2511
                        bump_file_max_and_nr_open();
36✔
2512

2513
                        write_container_id();
36✔
2514

2515
                        (void) write_boot_or_shutdown_osc("boot");
36✔
2516

2517
                        /* Copy os-release to the propagate directory, so that we update it for services running
2518
                         * under RootDirectory=/RootImage= when we do a soft reboot. */
2519
                        r = setup_os_release(RUNTIME_SCOPE_SYSTEM);
36✔
2520
                        if (r < 0)
36✔
2521
                                log_warning_errno(r, "Failed to copy os-release for propagation, ignoring: %m");
×
2522
                }
2523

2524
                r = watchdog_set_device(arg_watchdog_device);
52✔
2525
                if (r < 0)
52✔
2526
                        log_warning_errno(r, "Failed to set watchdog device to %s, ignoring: %m", arg_watchdog_device);
×
2527

2528
                if (!cap_test_all(arg_capability_bounding_set)) {
52✔
2529
                        r = capability_bounding_set_drop_usermode(arg_capability_bounding_set);
×
2530
                        if (r < 0) {
×
2531
                                *ret_error_message = "Failed to drop capability bounding set of usermode helpers";
×
2532
                                return log_struct_errno(LOG_EMERG, r,
×
2533
                                                        LOG_MESSAGE("Failed to drop capability bounding set of usermode helpers: %m"),
2534
                                                        LOG_MESSAGE_ID(SD_MESSAGE_CORE_CAPABILITY_BOUNDING_USER_STR));
2535
                        }
2536

2537
                        r = capability_bounding_set_drop(arg_capability_bounding_set, true);
×
2538
                        if (r < 0) {
×
2539
                                *ret_error_message = "Failed to drop capability bounding set";
×
2540
                                return log_struct_errno(LOG_EMERG, r,
×
2541
                                                        LOG_MESSAGE("Failed to drop capability bounding set: %m"),
2542
                                                        LOG_MESSAGE_ID(SD_MESSAGE_CORE_CAPABILITY_BOUNDING_STR));
2543
                        }
2544
                }
2545

2546
                if (arg_no_new_privs) {
52✔
2547
                        if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
×
2548
                                *ret_error_message = "Failed to disable new privileges";
×
2549
                                return log_struct_errno(LOG_EMERG, errno,
×
2550
                                                        LOG_MESSAGE("Failed to disable new privileges: %m"),
2551
                                                        LOG_MESSAGE_ID(SD_MESSAGE_CORE_DISABLE_PRIVILEGES_STR));
2552
                        }
2553
                }
2554

2555
                break;
2556

2557
        case RUNTIME_SCOPE_USER: {
191✔
2558
                _cleanup_free_ char *p = NULL;
191✔
2559

2560
                /* Create the runtime directory and place the inaccessible device nodes there, if we run in
2561
                 * user mode. In system mode mount_setup() already did that. */
2562

2563
                r = xdg_user_runtime_dir("/systemd", &p);
191✔
2564
                if (r < 0) {
191✔
2565
                        *ret_error_message = "$XDG_RUNTIME_DIR is not set";
×
2566
                        return log_struct_errno(LOG_EMERG, r,
×
2567
                                                LOG_MESSAGE("Failed to determine $XDG_RUNTIME_DIR path: %m"),
2568
                                                LOG_MESSAGE_ID(SD_MESSAGE_CORE_NO_XDGDIR_PATH_STR));
2569
                }
2570

2571
                if (!skip_setup) {
191✔
2572
                        (void) mkdir_p_label(p, 0755);
190✔
2573
                        (void) make_inaccessible_nodes(p, UID_INVALID, GID_INVALID);
190✔
2574

2575
                        r = setup_os_release(RUNTIME_SCOPE_USER);
190✔
2576
                        if (r < 0)
190✔
2577
                                log_warning_errno(r, "Failed to copy os-release for propagation, ignoring: %m");
1✔
2578
                }
2579

2580
                break;
191✔
2581
        }
2582

2583
        default:
×
2584
                assert_not_reached();
×
2585
        }
2586

2587
        /* The two operations on the ambient set are meant for a user serssion manager. They do not affect
2588
         * system manager operation, because by default it starts with an empty ambient set.
2589
         *
2590
         * Preserve the ambient set for later use with sd-executor processes. */
2591
        r = capability_get_ambient(saved_ambient_set);
243✔
2592
        if (r < 0)
243✔
2593
                log_warning_errno(r, "Failed to save ambient capabilities, ignoring: %m");
×
2594

2595
        /* Clear ambient capabilities, so services do not inherit them implicitly. Dropping them does
2596
         * not affect the permitted and effective sets which are important for the manager itself to
2597
         * operate. */
2598
        r = capability_ambient_set_apply(0, /* also_inherit= */ false);
243✔
2599
        if (r < 0)
243✔
2600
                log_warning_errno(r, "Failed to reset ambient capability set, ignoring: %m");
×
2601

2602
        if (arg_timer_slack_nsec != NSEC_INFINITY)
243✔
2603
                if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
×
2604
                        log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m");
×
2605

2606
        if (arg_syscall_archs) {
243✔
2607
                r = enforce_syscall_archs(arg_syscall_archs);
×
2608
                if (r < 0) {
×
2609
                        *ret_error_message = "Failed to set syscall architectures";
×
2610
                        return r;
×
2611
                }
2612
        }
2613

2614
        r = make_reaper_process(true);
243✔
2615
        if (r < 0)
243✔
2616
                log_warning_errno(r, "Failed to make us a subreaper, ignoring: %m");
×
2617

2618
        /* Bump up RLIMIT_NOFILE for systemd itself */
2619
        (void) bump_rlimit_nofile(saved_rlimit_nofile);
243✔
2620
        (void) bump_rlimit_memlock(saved_rlimit_memlock);
243✔
2621

2622
        return 0;
243✔
2623
}
2624

2625
static int do_queue_default_job(
226✔
2626
                Manager *m,
2627
                const char **ret_error_message) {
2628

2629
        _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
226✔
2630
        const char *unit;
226✔
2631
        Job *job;
226✔
2632
        Unit *target;
226✔
2633
        int r;
226✔
2634

2635
        if (arg_default_unit)
226✔
2636
                unit = arg_default_unit;
2637
        else if (in_initrd())
200✔
2638
                unit = SPECIAL_INITRD_TARGET;
2639
        else
2640
                unit = SPECIAL_DEFAULT_TARGET;
189✔
2641

2642
        log_debug("Activating default unit: %s", unit);
226✔
2643

2644
        r = manager_load_startable_unit_or_warn(m, unit, NULL, &target);
226✔
2645
        if (r < 0 && in_initrd() && !arg_default_unit) {
226✔
2646
                /* Fall back to default.target, which we used to always use by default. Only do this if no
2647
                 * explicit configuration was given. */
2648

2649
                log_info("Falling back to %s.", SPECIAL_DEFAULT_TARGET);
×
2650

2651
                r = manager_load_startable_unit_or_warn(m, SPECIAL_DEFAULT_TARGET, NULL, &target);
×
2652
        }
2653
        if (r < 0) {
×
2654
                log_info("Falling back to %s.", SPECIAL_RESCUE_TARGET);
×
2655

2656
                r = manager_load_startable_unit_or_warn(m, SPECIAL_RESCUE_TARGET, NULL, &target);
×
2657
                if (r < 0) {
×
2658
                        *ret_error_message = r == -ERFKILL ? SPECIAL_RESCUE_TARGET " masked"
×
2659
                                                           : "Failed to load " SPECIAL_RESCUE_TARGET;
×
2660
                        return r;
×
2661
                }
2662
        }
2663

2664
        assert(target->load_state == UNIT_LOADED);
226✔
2665

2666
        r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, &error, &job);
226✔
2667
        if (r == -EPERM) {
226✔
2668
                log_debug_errno(r, "Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
25✔
2669

2670
                sd_bus_error_free(&error);
25✔
2671

2672
                r = manager_add_job(m, JOB_START, target, JOB_REPLACE, &error, &job);
25✔
2673
                if (r < 0) {
25✔
2674
                        *ret_error_message = "Failed to start default target";
×
2675
                        return log_struct_errno(LOG_EMERG, r,
×
2676
                                                LOG_MESSAGE("Failed to start default target: %s", bus_error_message(&error, r)),
2677
                                                LOG_MESSAGE_ID(SD_MESSAGE_CORE_START_TARGET_FAILED_STR));
2678
                }
2679

2680
        } else if (r < 0) {
201✔
2681
                *ret_error_message = "Failed to isolate default target";
×
2682
                return log_struct_errno(LOG_EMERG, r,
×
2683
                                        LOG_MESSAGE("Failed to isolate default target: %s", bus_error_message(&error, r)),
2684
                                        LOG_MESSAGE_ID(SD_MESSAGE_CORE_ISOLATE_TARGET_FAILED_STR));
2685
        }
2686

2687
        log_info("Queued %s job for default target %s.",
226✔
2688
                 job_type_to_string(job->type), unit_status_string(job->unit, NULL));
2689

2690
        return 0;
2691
}
2692

2693
static void save_rlimits(struct rlimit *saved_rlimit_nofile,
243✔
2694
                         struct rlimit *saved_rlimit_memlock) {
2695

2696
        assert(saved_rlimit_nofile);
243✔
2697
        assert(saved_rlimit_memlock);
243✔
2698

2699
        if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0)
243✔
2700
                log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
×
2701

2702
        if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0)
243✔
2703
                log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
×
2704
}
243✔
2705

2706
static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) {
306✔
2707
        struct rlimit *rl;
306✔
2708

2709
        if (arg_defaults.rlimit[RLIMIT_NOFILE])
306✔
2710
                return;
2711

2712
        /* Make sure forked processes get limits based on the original kernel setting */
2713

2714
        rl = newdup(struct rlimit, saved_rlimit_nofile, 1);
306✔
2715
        if (!rl) {
306✔
2716
                log_oom();
×
2717
                return;
×
2718
        }
2719

2720
        /* Bump the hard limit for system services to a substantially higher value. The default
2721
         * hard limit current kernels set is pretty low (4K), mostly for historical
2722
         * reasons. According to kernel developers, the fd handling in recent kernels has been
2723
         * optimized substantially enough, so that we can bump the limit now, without paying too
2724
         * high a price in memory or performance. Note however that we only bump the hard limit,
2725
         * not the soft limit. That's because select() works the way it works, and chokes on fds
2726
         * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
2727
         * unexpecting programs that they get fds higher than what they can process using
2728
         * select(). By only bumping the hard limit but leaving the low limit as it is we avoid
2729
         * this pitfall:  programs that are written by folks aware of the select() problem in mind
2730
         * (and thus use poll()/epoll instead of select(), the way everybody should) can
2731
         * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit
2732
         * we pass. */
2733
        if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM)
306✔
2734
                rl->rlim_max = MIN((rlim_t) read_nr_open(), MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
83✔
2735

2736
        /* If for some reason we were invoked with a soft limit above 1024 (which should never
2737
         * happen!, but who knows what we get passed in from pam_limit when invoked as --user
2738
         * instance), then lower what we pass on to not confuse our children */
2739
        rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE);
306✔
2740

2741
        arg_defaults.rlimit[RLIMIT_NOFILE] = rl;
306✔
2742
}
2743

2744
static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) {
306✔
2745
        struct rlimit *rl;
306✔
2746

2747
        /* Pass the original value down to invoked processes */
2748

2749
        if (arg_defaults.rlimit[RLIMIT_MEMLOCK])
306✔
2750
                return;
2751

2752
        rl = newdup(struct rlimit, saved_rlimit_memlock, 1);
306✔
2753
        if (!rl) {
306✔
2754
                log_oom();
×
2755
                return;
×
2756
        }
2757

2758
        if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM)  {
306✔
2759
                /* Raise the default limit to 8M also on old kernels and in containers (8M is the kernel
2760
                 * default for this since kernel 5.16) */
2761
                rl->rlim_max = MAX(rl->rlim_max, (rlim_t) DEFAULT_RLIMIT_MEMLOCK);
83✔
2762
                rl->rlim_cur = MAX(rl->rlim_cur, (rlim_t) DEFAULT_RLIMIT_MEMLOCK);
83✔
2763
        }
2764

2765
        arg_defaults.rlimit[RLIMIT_MEMLOCK] = rl;
306✔
2766
}
2767

2768
static void setenv_manager_environment(void) {
306✔
2769
        int r;
306✔
2770

2771
        STRV_FOREACH(p, arg_manager_environment) {
306✔
2772
                log_debug("Setting '%s' in our own environment.", *p);
×
2773

2774
                r = putenv_dup(*p, true);
×
2775
                if (r < 0)
×
2776
                        log_warning_errno(r, "Failed to setenv \"%s\", ignoring: %m", *p);
×
2777
        }
2778
}
306✔
2779

2780
static void reset_arguments(void) {
496✔
2781
        /* Frees/resets arg_* variables, with a few exceptions commented below. */
2782

2783
        arg_default_unit = mfree(arg_default_unit);
496✔
2784

2785
        /* arg_runtime_scope — ignore */
2786

2787
        arg_dump_core = true;
496✔
2788
        arg_crash_chvt = -1;
496✔
2789
        arg_crash_shell = false;
496✔
2790
        arg_crash_action = CRASH_FREEZE;
496✔
2791
        arg_confirm_spawn = mfree(arg_confirm_spawn);
496✔
2792
        arg_show_status = _SHOW_STATUS_INVALID;
496✔
2793
        arg_status_unit_format = STATUS_UNIT_FORMAT_DEFAULT;
496✔
2794
        arg_switched_root = false;
496✔
2795
        arg_pager_flags = 0;
496✔
2796
        arg_service_watchdogs = true;
496✔
2797

2798
        unit_defaults_done(&arg_defaults);
496✔
2799
        unit_defaults_init(&arg_defaults, arg_runtime_scope);
496✔
2800

2801
        arg_runtime_watchdog = 0;
496✔
2802
        arg_reboot_watchdog = 10 * USEC_PER_MINUTE;
496✔
2803
        arg_kexec_watchdog = 0;
496✔
2804
        arg_pretimeout_watchdog = 0;
496✔
2805
        arg_early_core_pattern = mfree(arg_early_core_pattern);
496✔
2806
        arg_watchdog_device = mfree(arg_watchdog_device);
496✔
2807
        arg_watchdog_pretimeout_governor = mfree(arg_watchdog_pretimeout_governor);
496✔
2808

2809
        arg_default_environment = strv_free(arg_default_environment);
496✔
2810
        arg_manager_environment = strv_free(arg_manager_environment);
496✔
2811

2812
        arg_capability_bounding_set = CAP_MASK_ALL;
496✔
2813
        arg_no_new_privs = false;
496✔
2814
        arg_protect_system = -1;
496✔
2815
        arg_timer_slack_nsec = NSEC_INFINITY;
496✔
2816

2817
        arg_syscall_archs = set_free(arg_syscall_archs);
496✔
2818

2819
        /* arg_serialization — ignore */
2820

2821
        arg_machine_id = (sd_id128_t) {};
496✔
2822
        arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
496✔
2823

2824
        cpu_set_done(&arg_cpu_affinity);
496✔
2825
        numa_policy_reset(&arg_numa_policy);
496✔
2826

2827
        arg_random_seed = mfree(arg_random_seed);
496✔
2828
        arg_random_seed_size = 0;
496✔
2829
        arg_clock_usec = 0;
496✔
2830

2831
        arg_reload_limit_interval_sec = 0;
496✔
2832
        arg_reload_limit_burst = 0;
496✔
2833
}
496✔
2834

2835
static void determine_default_oom_score_adjust(void) {
306✔
2836
        int r, a, b;
306✔
2837

2838
        /* Run our services at slightly higher OOM score than ourselves. But let's be conservative here, and
2839
         * do this only if we don't run as root (i.e. only if we are run in user mode, for an unprivileged
2840
         * user). */
2841

2842
        if (arg_defaults.oom_score_adjust_set)
306✔
2843
                return;
150✔
2844

2845
        if (getuid() == 0)
306✔
2846
                return;
2847

2848
        r = get_oom_score_adjust(&a);
156✔
2849
        if (r < 0)
156✔
2850
                return (void) log_warning_errno(r, "Failed to determine current OOM score adjustment value, ignoring: %m");
×
2851

2852
        assert_cc(100 <= OOM_SCORE_ADJ_MAX);
156✔
2853
        b = saturate_add(a, 100, OOM_SCORE_ADJ_MAX);
156✔
2854

2855
        if (a == b)
156✔
2856
                return;
2857

2858
        arg_defaults.oom_score_adjust = b;
156✔
2859
        arg_defaults.oom_score_adjust_set = true;
156✔
2860
}
2861

2862
static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
306✔
2863
                               const struct rlimit *saved_rlimit_memlock) {
2864
        int r;
306✔
2865

2866
        assert(saved_rlimit_nofile);
306✔
2867
        assert(saved_rlimit_memlock);
306✔
2868

2869
        /* Assign configuration defaults */
2870
        reset_arguments();
306✔
2871

2872
        r = parse_config_file();
306✔
2873
        if (r < 0)
306✔
2874
                log_warning_errno(r, "Failed to parse config file, ignoring: %m");
×
2875

2876
        if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM) {
306✔
2877
                r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, 0);
83✔
2878
                if (r < 0)
83✔
2879
                        log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
×
2880
        }
2881

2882
        /* Initialize the show status setting if it hasn't been explicitly set yet */
2883
        if (arg_show_status == _SHOW_STATUS_INVALID)
306✔
2884
                arg_show_status = SHOW_STATUS_YES;
223✔
2885

2886
        /* Push variables into the manager environment block */
2887
        setenv_manager_environment();
306✔
2888

2889
        /* Parse log environment variables to take into account any new environment variables.
2890
         * Note that this also parses bits from the kernel command line, including "debug". */
2891
        log_parse_environment();
306✔
2892

2893
        /* Initialize some default rlimits for services if they haven't been configured */
2894
        fallback_rlimit_nofile(saved_rlimit_nofile);
306✔
2895
        fallback_rlimit_memlock(saved_rlimit_memlock);
306✔
2896

2897
        /* Slightly raise the OOM score for our services if we are running for unprivileged users. */
2898
        determine_default_oom_score_adjust();
306✔
2899

2900
        return 0;
306✔
2901
}
2902

2903
static int safety_checks(void) {
243✔
2904

2905
        if (getpid_cached() == 1 &&
243✔
2906
            arg_action != ACTION_RUN)
52✔
2907
                return log_error_errno(SYNTHETIC_ERRNO(EPERM),
×
2908
                                       "Unsupported execution mode while PID 1.");
2909

2910
        if (getpid_cached() == 1 &&
243✔
2911
            arg_runtime_scope == RUNTIME_SCOPE_USER)
52✔
2912
                return log_error_errno(SYNTHETIC_ERRNO(EPERM),
×
2913
                                       "Can't run --user mode as PID 1.");
2914

2915
        if (arg_action == ACTION_RUN &&
243✔
2916
            arg_runtime_scope == RUNTIME_SCOPE_SYSTEM &&
295✔
2917
            getpid_cached() != 1)
52✔
2918
                return log_error_errno(SYNTHETIC_ERRNO(EPERM),
×
2919
                                       "Can't run system mode unless PID 1.");
2920

2921
        if (arg_action == ACTION_TEST &&
243✔
2922
            geteuid() == 0)
×
2923
                return log_error_errno(SYNTHETIC_ERRNO(EPERM),
×
2924
                                       "Don't run test mode as root.");
2925

2926
        switch (arg_runtime_scope) {
243✔
2927

2928
        case RUNTIME_SCOPE_USER:
191✔
2929

2930
                if (arg_action == ACTION_RUN &&
382✔
2931
                    sd_booted() <= 0)
191✔
2932
                        return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
×
2933
                                               "Trying to run as user instance, but the system has not been booted with systemd.");
2934

2935
                if (arg_action == ACTION_RUN &&
382✔
2936
                    !getenv("XDG_RUNTIME_DIR"))
191✔
2937
                        return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
×
2938
                                               "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
2939

2940
                break;
2941

2942
        case RUNTIME_SCOPE_SYSTEM:
52✔
2943
                if (arg_action == ACTION_RUN &&
104✔
2944
                    running_in_chroot() > 0)
52✔
2945
                        return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
×
2946
                                               "Cannot be run in a chroot() environment.");
2947
                break;
2948

2949
        default:
×
2950
                assert_not_reached();
×
2951
        }
2952

2953
        return 0;
2954
}
2955

2956
static int initialize_security(
17✔
2957
                bool *loaded_policy,
2958
                dual_timestamp *security_start_timestamp,
2959
                dual_timestamp *security_finish_timestamp,
2960
                const char **ret_error_message) {
2961

2962
        int r;
17✔
2963

2964
        assert(loaded_policy);
17✔
2965
        assert(security_start_timestamp);
17✔
2966
        assert(security_finish_timestamp);
17✔
2967
        assert(ret_error_message);
17✔
2968

2969
        dual_timestamp_now(security_start_timestamp);
17✔
2970

2971
        r = mac_selinux_setup(loaded_policy);
17✔
2972
        if (r < 0) {
17✔
2973
                *ret_error_message = "Failed to load SELinux policy";
×
2974
                return r;
×
2975
        }
2976

2977
        r = mac_smack_setup(loaded_policy);
17✔
2978
        if (r < 0) {
17✔
2979
                *ret_error_message = "Failed to load SMACK policy";
×
2980
                return r;
×
2981
        }
2982

2983
        r = mac_apparmor_setup();
17✔
2984
        if (r < 0) {
17✔
2985
                *ret_error_message = "Failed to load AppArmor policy";
×
2986
                return r;
×
2987
        }
2988

2989
        r = ima_setup();
17✔
2990
        if (r < 0) {
17✔
2991
                *ret_error_message = "Failed to load IMA policy";
×
2992
                return r;
×
2993
        }
2994

2995
        r = ipe_setup();
17✔
2996
        if (r < 0) {
17✔
2997
                *ret_error_message = "Failed to load IPE policy";
×
2998
                return r;
×
2999
        }
3000

3001
        dual_timestamp_now(security_finish_timestamp);
17✔
3002
        return 0;
17✔
3003
}
3004

3005
static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
243✔
3006
        int r;
243✔
3007

3008
        assert(ret_fds);
243✔
3009
        assert(ret_error_message);
243✔
3010

3011
        /* Pick up all fds passed to us. We apply a filter here: we only take the fds that have O_CLOEXEC
3012
         * off. All fds passed via execve() to us must have O_CLOEXEC off, and our own code and dependencies
3013
         * should be clean enough to set O_CLOEXEC universally. Thus checking the bit should be a safe
3014
         * mechanism to distinguish passed in fds from our own.
3015
         *
3016
         * Why bother? Some subsystems we initialize early, specifically selinux might keep fds open in our
3017
         * process behind our back. We should not take possession of that (and then accidentally close
3018
         * it). SELinux thankfully sets O_CLOEXEC on its fds, so this test should work. */
3019
        r = fdset_new_fill(/* filter_cloexec= */ 0, ret_fds);
243✔
3020
        if (r < 0) {
243✔
3021
                *ret_error_message = "Failed to allocate fd set";
×
3022
                return log_struct_errno(LOG_EMERG, r,
×
3023
                                        LOG_MESSAGE("Failed to allocate fd set: %m"),
3024
                                        LOG_MESSAGE_ID(SD_MESSAGE_CORE_FD_SET_FAILED_STR));
3025
        }
3026

3027
        /* The serialization fd should have O_CLOEXEC turned on already, let's verify that we didn't pick it up here */
3028
        assert_se(!arg_serialization || !fdset_contains(*ret_fds, fileno(arg_serialization)));
243✔
3029

3030
        return 0;
3031
}
3032

3033
static void setup_console_terminal(bool skip_setup) {
243✔
3034

3035
        if (arg_runtime_scope != RUNTIME_SCOPE_SYSTEM)
243✔
3036
                return;
3037

3038
        /* If we are init, we connect stdin/stdout/stderr to /dev/null and make sure we don't have a
3039
         * controlling tty. */
3040
        terminal_detach_session();
52✔
3041

3042
        /* Reset the console, but only if this is really init and we are freshly booted */
3043
        if (!skip_setup)
52✔
3044
                (void) console_setup();
36✔
3045
}
3046

3047
static bool early_skip_setup_check(int argc, char *argv[]) {
243✔
3048
        bool found_deserialize = false;
243✔
3049

3050
        /* Determine if this is a reexecution or normal bootup. We do the full command line parsing much
3051
         * later, so let's just have a quick peek here. Note that if we have switched root, do all the
3052
         * special setup things anyway, even if in that case we also do deserialization. */
3053

3054
        for (int i = 1; i < argc; i++)
1,167✔
3055
                if (streq(argv[i], "--switched-root"))
927✔
3056
                        return false; /* If we switched root, don't skip the setup. */
3057
                else if (startswith(argv[i], "--deserialize=") || streq(argv[i], "--deserialize"))
924✔
3058
                        found_deserialize = true;
17✔
3059

3060
        return found_deserialize; /* When we are deserializing, then we are reexecuting, hence avoid the extensive setup */
3061
}
3062

3063
static int save_env(void) {
243✔
3064
        char **l;
243✔
3065

3066
        l = strv_copy(environ);
243✔
3067
        if (!l)
243✔
3068
                return log_oom();
×
3069

3070
        strv_free_and_replace(saved_env, l);
243✔
3071
        return 0;
243✔
3072
}
3073

3074
int main(int argc, char *argv[]) {
243✔
3075
        dual_timestamp
243✔
3076
                initrd_timestamp = DUAL_TIMESTAMP_NULL,
243✔
3077
                userspace_timestamp = DUAL_TIMESTAMP_NULL,
243✔
3078
                kernel_timestamp = DUAL_TIMESTAMP_NULL,
243✔
3079
                security_start_timestamp = DUAL_TIMESTAMP_NULL,
243✔
3080
                security_finish_timestamp = DUAL_TIMESTAMP_NULL;
243✔
3081
        struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0),
243✔
3082
                saved_rlimit_memlock = RLIMIT_MAKE_CONST(RLIM_INFINITY); /* The original rlimits we passed
243✔
3083
                                                                          * in. Note we use different values
3084
                                                                          * for the two that indicate whether
3085
                                                                          * these fields are initialized! */
3086
        bool skip_setup, loaded_policy = false, queue_default_job = false, first_boot = false;
243✔
3087
        char *switch_root_dir = NULL, *switch_root_init = NULL;
243✔
3088
        usec_t before_startup, after_startup;
243✔
3089
        static char systemd[] = "systemd";
243✔
3090
        const char *error_message = NULL;
243✔
3091
        uint64_t saved_ambient_set = 0;
243✔
3092
        int r, retval = EXIT_FAILURE;
243✔
3093
        Manager *m = NULL;
243✔
3094
        FDSet *fds = NULL;
243✔
3095

3096
        assert_se(argc > 0 && !isempty(argv[0]));
243✔
3097

3098
        /* Take timestamps early on */
3099
        dual_timestamp_from_monotonic(&kernel_timestamp, 0);
243✔
3100
        dual_timestamp_now(&userspace_timestamp);
243✔
3101

3102
        /* Figure out whether we need to do initialize the system, or if we already did that because we are
3103
         * reexecuting. */
3104
        skip_setup = early_skip_setup_check(argc, argv);
243✔
3105

3106
        /* If we get started via the /sbin/init symlink then we are called 'init'. After a subsequent
3107
         * reexecution we are then called 'systemd'. That is confusing, hence let's call us systemd
3108
         * right-away. */
3109
        program_invocation_short_name = systemd;
243✔
3110
        (void) prctl(PR_SET_NAME, systemd);
243✔
3111

3112
        /* Save the original command line */
3113
        save_argc_argv(argc, argv);
243✔
3114

3115
        /* Save the original environment as we might need to restore it if we're requested to execute another
3116
         * system manager later. */
3117
        r = save_env();
243✔
3118
        if (r < 0) {
243✔
3119
                error_message = "Failed to copy environment block";
×
3120
                goto finish;
×
3121
        }
3122

3123
        /* Make sure that if the user says "syslog" we actually log to the journal. */
3124
        log_set_upgrade_syslog_to_journal(true);
243✔
3125

3126
        if (getpid_cached() == 1) {
243✔
3127
                /* When we run as PID 1 force system mode */
3128
                arg_runtime_scope = RUNTIME_SCOPE_SYSTEM;
52✔
3129

3130
                /* Disable the umask logic */
3131
                umask(0);
52✔
3132

3133
                /* Make sure that at least initially we do not ever log to journald/syslogd, because it might
3134
                 * not be activated yet (even though the log socket for it exists). */
3135
                log_set_prohibit_ipc(true);
52✔
3136

3137
                /* Always reopen /dev/console when running as PID 1 or one of its pre-execve() children. This
3138
                 * is important so that we never end up logging to any foreign stderr, for example if we have
3139
                 * to log in a child process right before execve()'ing the actual binary, at a point in time
3140
                 * where socket activation stderr/stdout area already set up. */
3141
                log_set_always_reopen_console(true);
52✔
3142

3143
                if (detect_container() <= 0) {
52✔
3144

3145
                        /* Running outside of a container as PID 1 */
3146
                        log_set_target_and_open(LOG_TARGET_KMSG);
22✔
3147

3148
                        if (in_initrd())
22✔
3149
                                initrd_timestamp = userspace_timestamp;
11✔
3150

3151
                        if (!skip_setup) {
22✔
3152
                                r = mount_setup_early();
17✔
3153
                                if (r < 0) {
17✔
3154
                                        error_message = "Failed to mount early API filesystems";
×
3155
                                        goto finish;
×
3156
                                }
3157
                        }
3158

3159
                        /* We might have just mounted /proc, so let's try to parse the kernel
3160
                         * command line log arguments immediately. */
3161
                        log_parse_environment();
22✔
3162

3163
                        /* Let's open the log backend a second time, in case the first time didn't
3164
                         * work. Quite possibly we have mounted /dev just now, so /dev/kmsg became
3165
                         * available, and it previously wasn't. */
3166
                        log_open();
22✔
3167

3168
                        if (!skip_setup) {
22✔
3169
                                disable_printk_ratelimit();
17✔
3170

3171
                                r = initialize_security(
17✔
3172
                                                &loaded_policy,
3173
                                                &security_start_timestamp,
3174
                                                &security_finish_timestamp,
3175
                                                &error_message);
3176
                                if (r < 0)
17✔
3177
                                        goto finish;
×
3178
                        }
3179

3180
                        r = mac_init();
22✔
3181
                        if (r < 0) {
22✔
3182
                                error_message = "Failed to initialize MAC support";
×
3183
                                goto finish;
×
3184
                        }
3185

3186
                        if (!skip_setup)
22✔
3187
                                initialize_clock_timewarp();
17✔
3188

3189
                        clock_apply_epoch(/* allow_backwards= */ !skip_setup);
22✔
3190

3191
                        /* Set the default for later on, but don't actually open the logs like this for
3192
                         * now. Note that if we are transitioning from the initrd there might still be
3193
                         * journal fd open, and we shouldn't attempt opening that before we parsed
3194
                         * /proc/cmdline which might redirect output elsewhere. */
3195
                        log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
22✔
3196

3197
                } else {
3198
                        /* Running inside a container, as PID 1 */
3199
                        log_set_target_and_open(LOG_TARGET_CONSOLE);
30✔
3200

3201
                        /* For later on, see above... */
3202
                        log_set_target(LOG_TARGET_JOURNAL);
30✔
3203

3204
                        /* clear the kernel timestamp, because we are in a container */
3205
                        kernel_timestamp = DUAL_TIMESTAMP_NULL;
30✔
3206
                }
3207

3208
                initialize_coredump(skip_setup);
52✔
3209

3210
                r = fixup_environment();
52✔
3211
                if (r < 0) {
52✔
3212
                        log_struct_errno(LOG_EMERG, r,
×
3213
                                         LOG_MESSAGE("Failed to fix up PID 1 environment: %m"),
3214
                                         LOG_MESSAGE_ID(SD_MESSAGE_CORE_PID1_ENVIRONMENT_STR));
3215
                        error_message = "Failed to fix up PID1 environment";
×
3216
                        goto finish;
×
3217
                }
3218

3219
                /* Try to figure out if we can use colors with the console. No need to do that for user
3220
                 * instances since they never log into the console. */
3221
                log_show_color(colors_enabled());
52✔
3222

3223
                r = make_null_stdio();
52✔
3224
                if (r < 0)
52✔
3225
                        log_warning_errno(r, "Failed to redirect standard streams to /dev/null, ignoring: %m");
×
3226

3227
                /* Load the kernel modules early. */
3228
                if (!skip_setup)
52✔
3229
                        (void) kmod_setup();
36✔
3230

3231
                /* Mount /proc, /sys and friends, so that /proc/cmdline and /proc/$PID/fd is available. */
3232
                r = mount_setup(loaded_policy, skip_setup);
52✔
3233
                if (r < 0) {
52✔
3234
                        error_message = "Failed to mount API filesystems";
×
3235
                        goto finish;
×
3236
                }
3237

3238
                /* The efivarfs is now mounted, let's lock down the system token. */
3239
                lock_down_efi_variables();
52✔
3240
        } else {
3241
                /* Running as user instance */
3242
                arg_runtime_scope = RUNTIME_SCOPE_USER;
191✔
3243
                log_set_always_reopen_console(true);
191✔
3244
                log_set_target_and_open(LOG_TARGET_AUTO);
191✔
3245

3246
                /* clear the kernel timestamp, because we are not PID 1 */
3247
                kernel_timestamp = DUAL_TIMESTAMP_NULL;
191✔
3248

3249
                r = mac_init();
191✔
3250
                if (r < 0) {
191✔
3251
                        error_message = "Failed to initialize MAC support";
×
3252
                        goto finish;
×
3253
                }
3254
        }
3255

3256
        /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when
3257
         * transitioning from the initrd to the main systemd or suchlike. */
3258
        save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock);
243✔
3259

3260
        /* Reset all signal handlers. */
3261
        (void) reset_all_signal_handlers();
243✔
3262
        (void) ignore_signals(SIGNALS_IGNORE);
243✔
3263

3264
        (void) parse_configuration(&saved_rlimit_nofile, &saved_rlimit_memlock);
243✔
3265

3266
        r = parse_argv(argc, argv);
243✔
3267
        if (r < 0) {
243✔
3268
                error_message = "Failed to parse command line arguments";
×
3269
                goto finish;
×
3270
        }
3271

3272
        r = safety_checks();
243✔
3273
        if (r < 0)
243✔
3274
                goto finish;
×
3275

3276
        if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES, ACTION_BUS_INTROSPECT))
243✔
3277
                pager_open(arg_pager_flags);
×
3278

3279
        if (arg_action != ACTION_RUN)
243✔
3280
                skip_setup = true;
×
3281

3282
        if (arg_action == ACTION_HELP) {
243✔
3283
                retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
×
3284
                goto finish;
×
3285
        } else if (arg_action == ACTION_VERSION) {
243✔
3286
                retval = version();
×
3287
                goto finish;
×
3288
        } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
243✔
3289
                unit_dump_config_items(stdout);
×
3290
                retval = EXIT_SUCCESS;
×
3291
                goto finish;
×
3292
        } else if (arg_action == ACTION_DUMP_BUS_PROPERTIES) {
243✔
3293
                dump_bus_properties(stdout);
×
3294
                retval = EXIT_SUCCESS;
×
3295
                goto finish;
×
3296
        } else if (arg_action == ACTION_BUS_INTROSPECT) {
243✔
3297
                r = bus_manager_introspect_implementations(stdout, arg_bus_introspect);
×
3298
                retval = r >= 0 ? EXIT_SUCCESS : EXIT_FAILURE;
×
3299
                goto finish;
×
3300
        }
3301

3302
        assert_se(IN_SET(arg_action, ACTION_RUN, ACTION_TEST));
243✔
3303

3304
        /* Move out of the way, so that we won't block unmounts */
3305
        assert_se(chdir("/") == 0);
243✔
3306

3307
        if (arg_action == ACTION_RUN) {
243✔
3308
                if (!skip_setup) {
243✔
3309
                        /* Apply the systemd.clock_usec= kernel command line switch */
3310
                        apply_clock_update();
226✔
3311

3312
                        /* Apply random seed from kernel command line */
3313
                        cmdline_take_random_seed();
226✔
3314
                }
3315

3316
                /* A core pattern might have been specified via the cmdline. */
3317
                initialize_core_pattern(skip_setup);
243✔
3318

3319
                /* Make /usr/ read-only */
3320
                apply_protect_system(skip_setup);
243✔
3321

3322
                /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
3323
                log_close();
243✔
3324

3325
                /* Remember open file descriptors for later deserialization */
3326
                r = collect_fds(&fds, &error_message);
243✔
3327
                if (r < 0)
243✔
3328
                        goto finish;
×
3329

3330
                /* Give up any control of the console, but make sure its initialized. */
3331
                setup_console_terminal(skip_setup);
243✔
3332

3333
                /* Open the logging devices, if possible and necessary */
3334
                log_open();
243✔
3335
        }
3336

3337
        log_execution_mode(&first_boot);
243✔
3338

3339
        r = cg_has_legacy();
243✔
3340
        if (r < 0) {
243✔
3341
                error_message = "Failed to check cgroup hierarchy";
×
3342
                goto finish;
×
3343
        }
3344
        if (r > 0) {
243✔
3345
                r = log_full_errno(LOG_EMERG, SYNTHETIC_ERRNO(EPROTO),
×
3346
                                   "Detected cgroup v1 hierarchy at /sys/fs/cgroup/, which is no longer supported by current version of systemd.\n"
3347
                                   "Please instruct your initrd to mount cgroup v2 (unified) hierarchy,\n"
3348
                                   "possibly by removing any stale kernel command line options, such as:\n"
3349
                                   "  systemd.legacy_systemd_cgroup_controller=1\n"
3350
                                   "  systemd.unified_cgroup_hierarchy=0");
3351

3352
                error_message = "Detected unsupported legacy cgroup hierarchy, refusing execution";
×
3353
                goto finish;
×
3354
        }
3355

3356
        /* Building without libmount is allowed, but if it is compiled in, then we must be able to load it */
3357
        r = dlopen_libmount();
243✔
3358
        if (r < 0 && !ERRNO_IS_NEG_NOT_SUPPORTED(r)) {
243✔
3359
                error_message = "Failed to load libmount.so";
×
3360
                goto finish;
×
3361
        }
3362

3363
        r = initialize_runtime(skip_setup,
243✔
3364
                               first_boot,
3365
                               &saved_rlimit_nofile,
3366
                               &saved_rlimit_memlock,
3367
                               &saved_ambient_set,
3368
                               &error_message);
3369
        if (r < 0)
243✔
3370
                goto finish;
×
3371

3372
        r = manager_new(arg_runtime_scope,
243✔
3373
                        arg_action == ACTION_TEST ? MANAGER_TEST_FULL : 0,
243✔
3374
                        &m);
3375
        if (r < 0) {
243✔
3376
                log_struct_errno(LOG_EMERG, r,
×
3377
                                 LOG_MESSAGE("Failed to allocate manager object: %m"),
3378
                                 LOG_MESSAGE_ID(SD_MESSAGE_CORE_MANAGER_ALLOCATE_STR));
3379
                error_message = "Failed to allocate manager object";
×
3380
                goto finish;
×
3381
        }
3382

3383
        m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
243✔
3384
        m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
243✔
3385
        m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
243✔
3386
        m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
243✔
3387
        m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
243✔
3388

3389
        m->saved_ambient_set = saved_ambient_set;
243✔
3390

3391
        set_manager_defaults(m);
243✔
3392
        set_manager_settings(m);
243✔
3393
        manager_set_first_boot(m, first_boot);
243✔
3394
        manager_set_switching_root(m, arg_switched_root);
243✔
3395

3396
        /* Remember whether we should queue the default job */
3397
        queue_default_job = !arg_serialization || arg_switched_root;
243✔
3398

3399
        before_startup = now(CLOCK_MONOTONIC);
243✔
3400

3401
        r = manager_startup(m, arg_serialization, fds, /* root= */ NULL);
243✔
3402
        if (r < 0) {
243✔
3403
                error_message = "Failed to start up manager";
×
3404
                goto finish;
×
3405
        }
3406

3407
        /* This will close all file descriptors that were opened, but not claimed by any unit. */
3408
        fds = fdset_free(fds);
243✔
3409
        arg_serialization = safe_fclose(arg_serialization);
243✔
3410

3411
        if (queue_default_job) {
243✔
3412
                r = do_queue_default_job(m, &error_message);
226✔
3413
                if (r < 0)
226✔
3414
                        goto finish;
×
3415
        }
3416

3417
        after_startup = now(CLOCK_MONOTONIC);
243✔
3418

3419
        log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
243✔
3420
                 "Loaded units and determined initial transaction in %s.",
3421
                 FORMAT_TIMESPAN(after_startup - before_startup, 100 * USEC_PER_MSEC));
3422

3423
        if (arg_action == ACTION_TEST) {
243✔
3424
                manager_test_summary(m);
×
3425
                retval = EXIT_SUCCESS;
×
3426
                goto finish;
×
3427
        }
3428

3429
        r = invoke_main_loop(m,
243✔
3430
                             &saved_rlimit_nofile,
3431
                             &saved_rlimit_memlock,
3432
                             &retval,
3433
                             &fds,
3434
                             &switch_root_dir,
3435
                             &switch_root_init,
3436
                             &error_message);
3437
        /* MANAGER_OK and MANAGER_RELOAD are not expected here. */
3438
        assert(r < 0 || IN_SET(r, MANAGER_REEXECUTE, MANAGER_EXIT) ||
243✔
3439
               (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM &&
3440
                IN_SET(r, MANAGER_REBOOT,
3441
                          MANAGER_SOFT_REBOOT,
3442
                          MANAGER_POWEROFF,
3443
                          MANAGER_HALT,
3444
                          MANAGER_KEXEC,
3445
                          MANAGER_SWITCH_ROOT)));
3446

3447
finish:
×
3448
        pager_close();
243✔
3449

3450
        if (m) {
243✔
3451
                arg_reboot_watchdog = manager_get_watchdog(m, WATCHDOG_REBOOT);
243✔
3452
                arg_kexec_watchdog = manager_get_watchdog(m, WATCHDOG_KEXEC);
243✔
3453
                m = manager_free(m);
243✔
3454
        }
3455

3456
        mac_selinux_finish();
243✔
3457

3458
        if (IN_SET(r, MANAGER_REEXECUTE, MANAGER_SWITCH_ROOT, MANAGER_SOFT_REBOOT))
243✔
3459
                r = do_reexecute(r,
39✔
3460
                                 argc, argv,
3461
                                 &saved_rlimit_nofile,
3462
                                 &saved_rlimit_memlock,
3463
                                 fds,
3464
                                 switch_root_dir,
3465
                                 switch_root_init,
3466
                                 saved_ambient_set,
3467
                                 &error_message); /* This only returns if reexecution failed */
3468

3469
        arg_serialization = safe_fclose(arg_serialization);
204✔
3470
        fds = fdset_free(fds);
204✔
3471

3472
        saved_env = strv_free(saved_env);
204✔
3473

3474
#if HAVE_VALGRIND_VALGRIND_H
3475
        /* If we are PID 1 and running under valgrind, then let's exit
3476
         * here explicitly. valgrind will only generate nice output on
3477
         * exit(), not on exec(), hence let's do the former not the
3478
         * latter here. */
3479
        if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
3480
                /* Cleanup watchdog_device strings for valgrind. We need them
3481
                 * in become_shutdown() so normally we cannot free them yet. */
3482
                watchdog_free_device();
3483
                reset_arguments();
3484
                return retval;
3485
        }
3486
#endif
3487

3488
#if HAS_FEATURE_ADDRESS_SANITIZER
3489
        /* At this stage we most likely don't have stdio/stderr open, so the following
3490
         * LSan check would not print any actionable information and would just crash
3491
         * PID 1. To make this a bit more helpful, let's try to open /dev/console,
3492
         * and if we succeed redirect LSan's report there. */
3493
        if (getpid_cached() == 1) {
3494
                _cleanup_close_ int tty_fd = -EBADF;
3495

3496
                tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
3497
                if (tty_fd >= 0)
3498
                        __sanitizer_set_report_fd((void*) (intptr_t) tty_fd);
3499

3500
                __lsan_do_leak_check();
3501
        }
3502
#endif
3503

3504
        if (r < 0)
204✔
3505
                (void) sd_notifyf(/* unset_environment= */ false,
×
3506
                                  "ERRNO=%i", -r);
3507

3508
        /* Try to invoke the shutdown binary unless we already failed.
3509
         * If we failed above, we want to freeze after finishing cleanup. */
3510
        if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM &&
204✔
3511
            IN_SET(r, MANAGER_EXIT, MANAGER_REBOOT, MANAGER_POWEROFF, MANAGER_HALT, MANAGER_KEXEC)) {
14✔
3512
                r = become_shutdown(r, retval);
14✔
3513
                log_error_errno(r, "Failed to execute shutdown binary, %s: %m", getpid_cached() == 1 ? "freezing" : "quitting");
×
3514
                error_message = "Failed to execute shutdown binary";
×
3515
        }
3516

3517
        /* This is primarily useful when running systemd in a VM, as it provides the user running the VM with
3518
         * a mechanism to pick up systemd's exit status in the VM. */
3519
        (void) sd_notifyf(/* unset_environment= */ false,
190✔
3520
                          "EXIT_STATUS=%i", retval);
3521

3522
        watchdog_free_device();
190✔
3523
        arg_watchdog_device = mfree(arg_watchdog_device);
190✔
3524

3525
        if (getpid_cached() == 1) {
190✔
3526
                if (error_message)
×
3527
                        manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
×
3528
                                              ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
3529
                                              "%s.", error_message);
3530
                freeze_or_exit_or_reboot();
×
3531
        }
3532

3533
        reset_arguments();
190✔
3534
        return retval;
190✔
3535
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc