• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

boustrophedon / extrasafe / 11256497533

09 Oct 2024 01:56PM UTC coverage: 77.969% (+0.5%) from 77.506%
11256497533

Pull #48

github

web-flow
Merge 139264a4c into b56d18c8d
Pull Request #48: feat(builtins): add builtin `UserId`

75 of 75 new or added lines in 3 files covered. (100.0%)

2 existing lines in 2 files now uncovered.

952 of 1221 relevant lines covered (77.97%)

93.34 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

15.43
/src/isolate/isolate_sys.rs
1
//! Libc and syscall functions. Most of these functions do not return errors and simply panic
2
//! because once we're in the isolate, there's nothing to handle the error even if it were
3
//! propagated upwards.
4
//!
5
//! Control flow works as follows:
6
//! - We want to call some function `func` inside our namespace
7
//! - After re-executing self via Command, `Isolate::main_hook` eventually calls
8
//! `clone_into_namespace` with a bunch of configuration data, including which function we
9
//! eventually want to call
10
//! - `clone_into_namespace` sets up the clone syscall with the namespace parameters and the config
11
//! data, and calls clone with the `run_isolate` function
12
//! - `run_isolate` unpacks the config data and uses it to set up a new tmpfs and bindmounts inside it, then does `pivot_root` into the tmpfs.
13
//! - Finally, `run_isolate` calls `func` and then exits when it's done.
14
#![allow(unsafe_code)]
15
use std::path::{Path, PathBuf};
16
use std::fs::File;
17
use std::ffi::CString;
18
use super::IsolateError;
19
use std::io::Write;
20
use std::os::fd::FromRawFd;
21

22
use std::collections::HashMap;
23

24
// 2MB (https://doc.rust-lang.org/std/thread/#stack-size)
25
// TODO: add config for this
26
pub const CHILD_STACK_SIZE: usize = 2_000_000;
27

28
/// Panic if the first parameter passed is negative. The provided message and the error message
29
/// from `std::io::Error::last_os_error()` are displayed in the panic string.
30
macro_rules! fail_negative {
31
    ($rc:expr, $message:expr) => {
32
        if ($rc < 0) {
33
            let err = std::io::Error::last_os_error();
34
            let msg = format!("{}: {}", $message, err);
35
            panic!("{}", msg);
36
        }
37
    }
38
}
39

40
/// Panic if the first parameter passed is a null pointer. The provided message and the error
41
/// message from `std::io::Error::last_os_error()` are displayed in the panic string.
42
macro_rules! fail_null {
43
    ($ptr:expr, $message:expr) => {
44
        if ($ptr.is_null()) {
45
            let err = std::io::Error::last_os_error();
46
            let msg = format!("{}: {}", $message, err);
47
            panic!("{}", msg);
48
        }
49
    }
50
}
51

52
/// Check rc for negative return code and create `std::io::Error`
53
fn check_err(retcode: i32) -> std::io::Result<()> {
12✔
54
    if retcode >= 0 {
12✔
55
        std::io::Result::Ok(())
12✔
56
    }
57
    else {
58
        std::io::Result::Err(std::io::Error::last_os_error())
×
59
    }
60
}
12✔
61

62
#[derive(Debug)]
63
/// Contains the data passed from the parent process to the Isolate via `libc::clone`'s `arg`
64
/// pointer parameter.
65
pub struct IsolateConfigData {
66
    /// The isolate name
67
    pub isolate_name: &'static str,
68
    /// The bindmount mappings
69
    pub bindmounts: HashMap<PathBuf, PathBuf>,
70
    /// The function to call after setup
71
    pub func: fn() -> (),
72
    /// The size of the tmpfs
73
    pub root_fs_size: u32,
74
    /// The user id of the parent process
75
    pub parent_user: libc::uid_t,
76
    /// The group id of the parent process
77
    pub parent_group: libc::gid_t,
78
    /// The temporary directory in which the Isolate will live.
79
    // TODO: technically we don't need this if we chdir in the parent before execing
80
    pub tempdir: PathBuf,
81
}
82

83
impl IsolateConfigData {
84
    pub fn new(isolate_name: &'static str, bindmounts: HashMap<PathBuf, PathBuf>, func: fn() -> (), root_fs_size: u32, tempdir: PathBuf) -> IsolateConfigData {
×
85
        let parent_user = unsafe { libc::geteuid() };
×
86
        let parent_group = unsafe { libc::getegid() };
×
87
        IsolateConfigData {
×
88
            isolate_name,
×
89
            bindmounts,
×
90
            func,
×
91
            root_fs_size,
×
92
            parent_user,
×
93
            parent_group,
×
94
            tempdir
×
95
        }
×
96
    }
×
97
}
98

99
/// Map the parent id to root in the new namespace. In the future it might be useful to allow other
100
/// users but root is used as a hint to the end-user that they have `CAP_SYS_ADMIN` in the
101
/// Isolate's namespace.
102
fn map_user_to_root(parent_user: libc::uid_t, parent_group: libc::gid_t) {
×
103
    std::fs::write("/proc/self/uid_map", format!("0 {parent_user} 1\n"))
×
104
        .expect("failed to map child id");
×
105
    std::fs::write("/proc/self/setgroups", "deny\n")
×
106
        .expect("failed to enable child group mapping");
×
107
    std::fs::write("/proc/self/gid_map", format!("0 {parent_group} 1\n"))
×
108
        .expect("failed to map child gid");
×
109
}
×
110

111
/// This is the "new main" function after the clone call.
112
extern "C" fn run_isolate(data: *mut libc::c_void) -> i32 {
×
113
    // This is valid because all virtual memory except the stack is cloned when we call the clone
×
114
    // syscall. All heap pointers are still valid, they just point to a new copy of the data.
×
115
    let dataptr: *mut IsolateConfigData = data.cast::<IsolateConfigData>();
×
116
    let config_data = unsafe { Box::from_raw(dataptr) };
×
117
    
×
118
    let isolate_name_cstr = CString::new(config_data.isolate_name)
×
119
        .expect("please don't put null bytes in your isolate name");
×
120
    let cstr_ptr = isolate_name_cstr.as_ptr();
×
121
    let rc = unsafe { libc::prctl(libc::PR_SET_NAME, cstr_ptr) };
×
122
    fail_negative!(rc, "prctl set process name failed");
×
123

124
    map_user_to_root(config_data.parent_user, config_data.parent_group);
×
125

×
126
    mount_tmpfs(&config_data.tempdir, config_data.root_fs_size);
×
127
    for (src, dst) in config_data.bindmounts {
×
128
        do_bindmount(&config_data.tempdir, &src, &dst);
×
129
    }
×
130
    do_pivot_root(&config_data.tempdir);
×
131
    //// TODO: if config_data.drop_caps, drop capabilities
×
132
    close_fds();
×
133
    (config_data.func)();
×
134
    std::process::exit(0);
×
135
}
136

137
/// Make a tempdir in /tmp in which to mount our private tmpfs where the isolate will eventually
138
/// live
139
pub fn make_tempdir(isolate_name: &str) -> PathBuf {
×
140
    assert!(isolate_name.is_ascii(), "tmpdir template name must be ascii");
×
141

142
    let template_str = format!("/tmp/{}.XXXXXX\0", isolate_name);
×
143
    let mut dir_buf: Vec<u8> = template_str.clone().into_bytes();
×
144

×
145
    let dir_ptr: *mut i8 = dir_buf.as_mut_ptr().cast::<i8>();
×
146
    let ret = unsafe { libc::mkdtemp(dir_ptr) };
×
147
    fail_null!(ret, "failed to create temporary directory after clone");
×
148

149
    // remove null byte
150
    let _ = dir_buf.pop();
×
151
    let dir = String::from_utf8(dir_buf)
×
152
        .expect("mkdtemp template string should always be utf8");
×
153
    
×
154
    PathBuf::from(dir)
×
155
}
×
156

157
/// Mount a private tmpfs inside the created tempdir
158
fn mount_tmpfs(tempdir: &Path, max_size: u32) {
×
159
    let tmp_dircstr = CString::new(tempdir.as_os_str().as_encoded_bytes()).unwrap();
×
160
    let tmp_cstr = CString::new("tmpfs").unwrap();
×
161
    let options = CString::new(format!("size={}m", max_size)).unwrap();
×
162
    let options_ptr: *const libc::c_void = options.as_ptr().cast::<libc::c_void>();
×
163
    let rc = unsafe { libc::mount(tmp_cstr.as_ptr(),
×
164
                                   tmp_dircstr.as_ptr(),
×
165
                                   tmp_cstr.as_ptr(),
×
166
                                   0,
×
167
                                   options_ptr) };
×
168
    fail_negative!(rc, "failed to mount tmpfs after clone");
×
169

170
    // make sure the mount is private
171
    let empty_cstr = CString::new("").unwrap();
×
172
    let rc = unsafe { libc::mount(empty_cstr.as_ptr(),
×
173
                                   tmp_dircstr.as_ptr(),
×
174
                                   empty_cstr.as_ptr(),
×
175
                                   libc::MS_REC | libc::MS_PRIVATE,
×
176
                                   std::ptr::null()) };
×
177
    fail_negative!(rc, "failed to make tmpfs private after mounting");
×
178
}
×
179

180
/// Set up a bindmount inside the new root. root is the root tmpfs dir, src is a directory from the
181
/// original filesystem, and dst is the location inside root in which to bindmount src. dst may be
182
/// absolute or relative - in both cases it is joined to root. Intermediate directories are created
183
/// automatically when mounting.
184
fn do_bindmount(root: &Path, src: &Path, dst: &Path) {
×
185
    // TODO (?) does this actually have any security implications? It seems like a good thing to do
186
    // in general but I'm not sure if you could actually do anything "bad" with it that you
187
    // couldn't do if an attacker otherwise controlled a dst path.
188
    for a in dst.ancestors() {
×
189
        assert!(!a.ends_with("."), "bindmount dst directory must not contain . paths: {}", dst.display());
×
190
        assert!(!a.ends_with(".."), "bindmount dst directory must not contain .. paths: {}", dst.display());
×
191
    }
192

193
    let dst = if dst.is_absolute() { dst.strip_prefix("/").unwrap() } else { dst };
×
194
    let dst = root.join(dst);
×
195

×
196
    // if directory, create all directories
×
197
    // else if file, socket, etc., create all parent directories and make empty file to bindmount
×
198
    // on to.
×
199
    if src.is_dir() {
×
200
        std::fs::create_dir_all(&dst)
×
201
            .unwrap_or_else(|_| panic!("failed to create dst directory (or parent directories) when bindmounting {}", dst.display()));
×
202
    }
×
203
    else {
204
        if let Some(parent) = dst.parent() {
×
205
            std::fs::create_dir_all(parent)
×
206
                .unwrap_or_else(|_| panic!("failed to create parent directories when bindmounting {}", dst.display()));
×
207
        }
×
208
        drop(File::create(&dst)
×
209
            .unwrap_or_else(|_| panic!("failed to create empty file when bindmounting {}", dst.display())));
×
UNCOV
210
    }
×
211

212
    let src_dircstr = CString::new(src.as_os_str().as_encoded_bytes()).unwrap();
×
213
    let dst_dircstr = CString::new(dst.as_os_str().as_encoded_bytes()).unwrap();
×
214
    let bind_cstr = CString::new("bind").unwrap();
×
215
    let rc = unsafe { libc::mount(src_dircstr.as_ptr(),
×
216
                                   dst_dircstr.as_ptr(),
×
217
                                   bind_cstr.as_ptr(),
×
218
                                   libc::MS_REC | libc::MS_BIND,
×
219
                                   std::ptr::null()) };
×
220
    fail_negative!(rc, format!("failed to bindmount. do you have permissions for the src directory? dst must also exist! (it should be an empty file or directory)\nsrc: {:?}, dst: {:?}", src, dst));
×
221
}
×
222

223
/// `pivot_root(".", ".")` is explicitly documented in the manpage:
224
/// <https://man7.org/linux/man-pages/man2/pivot_root.2.html>
225
fn do_pivot_root(tmpfs: &Path) {
×
226
    // change directory to new root
×
227
    std::env::set_current_dir(tmpfs)
×
228
        .unwrap_or_else(|_| panic!("failed to chdir to {}", tmpfs.display()));
×
229

×
230
    // `pivot_root(".", ".")`
×
231
    let curdir_cstr = CString::new(".").unwrap();
×
232
    let curdir_ptr = curdir_cstr.as_ptr();
×
233
    let rc = unsafe { libc::syscall(libc::SYS_pivot_root, curdir_ptr, curdir_ptr) };
×
234
    fail_negative!(rc, format!("failed to pivot_root . . into {}", tmpfs.display()));
×
235

236
    // now unmount old / with MNT_DETACH
237
    let rc = unsafe { libc::umount2(curdir_ptr, libc::MNT_DETACH) };
×
238
    fail_negative!(rc, "failed to unmount old /");
×
239
}
×
240

241
pub fn create_memfd_from_self_exe() -> Result<File, IsolateError> {
12✔
242
    // Per the memfd_open manpage, multiple memfds can have the same name without issue.
12✔
243
    let memfd_name = CString::new("isolate_memfd").unwrap();
12✔
244

245
    let exe_data = std::fs::read("/proc/self/exe")
12✔
246
        .map_err(IsolateError::MemFd)?;
12✔
247
    let exe_bytes = &exe_data;
12✔
248
    let fsize = exe_bytes.len() as u64;
12✔
249
    let memfd = unsafe { libc::memfd_create(memfd_name.as_ptr(), 0) };
12✔
250
    check_err(memfd)
12✔
251
        .map_err(IsolateError::MemFd)?;
12✔
252
    let mut memfd_file = unsafe { std::fs::File::from_raw_fd(memfd) };
12✔
253
    memfd_file.set_len(fsize).expect("ftruncate on memfd");
12✔
254
    let _count = memfd_file.write(exe_bytes).expect("write exe data to memfd after sizing");
12✔
255

12✔
256
    Ok(memfd_file)
12✔
257
}
12✔
258

259
pub fn clone_into_namespace(stack: &mut [u8],
×
260
        config_data: IsolateConfigData,
×
261
        new_network: bool) ->
×
262
        (libc::pid_t, libc::id_t) {
×
263
    let flags = libc::CLONE_NEWNS | libc::CLONE_NEWUSER | libc::CLONE_NEWPID | libc::CLONE_NEWIPC | libc::CLONE_NEWUTS  | libc::CLONE_PIDFD;
×
264
    let flags = if new_network {
×
265
        flags | libc::CLONE_NEWNET
×
266
    } else { flags };
×
267

268
    let mut pidfd: libc::pid_t = 0; 
×
269
    // the argument used for pidfd is defined as an i32/pid_t but waitid takes a u32/id_t so we
×
270
    // convert on return
×
271
    let pidfd_ref: *mut libc::pid_t = &mut pidfd;
×
272

×
273
    //let stack_ptr = unsafe { std::mem::transmute::<*mut u8, *mut libc::c_void>(stack.as_mut_ptr().wrapping_add(CHILD_STACK_SIZE)) };
×
274
    // The stack grows down, so we need to provide clone a pointer to the end of our stack data
×
275
    // vec.
×
276
    let stack_ptr: *mut libc::c_void = stack.as_mut_ptr().wrapping_add(CHILD_STACK_SIZE).cast::<libc::c_void>();
×
277

×
278
    let fnptr = run_isolate;
×
279
    let data = Box::new(config_data);
×
280
    let data_ptr: *mut libc::c_void = Box::into_raw(data).cast::<libc::c_void>();
×
281
    
×
282
    let pid = unsafe { libc::clone(fnptr, stack_ptr, flags, data_ptr, pidfd_ref) };
×
283
    (pid, pidfd.try_into().unwrap())
×
284
}
×
285

286
pub fn wait_for_child(pidfd: libc::id_t) -> i32 {
×
287
    let mut child_status: libc::siginfo_t = unsafe { std::mem::zeroed() };
×
288
    let rc = unsafe {libc::waitid(libc::P_PIDFD, pidfd, &mut child_status, libc::__WALL | libc::WEXITED) };
×
289
    fail_negative!(rc, "waitid failed");
×
290
    unsafe { child_status.si_status() }
×
291
}
×
292

293
/// Close all fds >= 3, leaving stdin, stdout, stderr alone
294
fn close_fds() {
×
295
    let flags: i32 = libc::CLOSE_RANGE_UNSHARE.try_into().unwrap();
×
296
    let rc = unsafe { libc::syscall(libc::SYS_close_range, 3, u32::MAX, flags) };
×
297
    fail_negative!(rc, "failed to close fds > 3 after pivot_root");
×
298
}
×
299

300
#[cfg(test)]
301
mod tests {
302
    use std::ffi::CString;
303

304
    #[test]
305
    #[should_panic(expected = "catch me abc")]
306
    fn test_fail_negative() {
1✔
307
        // we definitely shouldn't have 9k+ fds open so this should always fail
1✔
308
        let rc = unsafe { libc::close(9999) };
1✔
309
        fail_negative!(rc, "catch me abc");
1✔
310
    }
×
311

312
    #[test]
313
    #[should_panic(expected = "catch me xyz")]
314
    fn test_fail_null() {
1✔
315
        let path = CString::new("/nonexistant123").unwrap();
1✔
316
        let r = CString::new("r").unwrap();
1✔
317
        let f = unsafe { libc::fopen(path.as_ptr(), r.as_ptr()) };
1✔
318
        fail_null!(f, "catch me xyz");
1✔
319
    }
×
320
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc