Skip to content

Commit 776de30

Browse files
authored
Merge pull request #107 from bleggett/bleggett/scopedfds
Prefer owned types for filedescriptors where posssible
2 parents 0c8a4e3 + 53db9c9 commit 776de30

3 files changed

Lines changed: 65 additions & 72 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ env_logger = "0.11.8"
1414
libc = "0.2.182"
1515
log = "0.4.29"
1616
mktemp-rs = "0.2.0"
17-
nix = { version = "0.31.2", features = ["process", "signal", "user"] }
17+
nix = { version = "0.31.2", features = ["event", "process", "signal", "user"] }
1818
serde = { version = "1.0.228", features = ["derive"] }
1919
serde_json = "1.0.149"
2020
tokio = { version = "1.50.0", optional = true }

src/unshare.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::io;
2+
use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
23

34
use anyhow::Result;
45
use libc;
@@ -19,7 +20,7 @@ pub fn unshare<'x>(iter: impl IntoIterator<Item = &'x Namespace>) -> Result<()>
1920
}
2021

2122
/// A simple wrapper around the pidfd_open(2) syscall.
22-
pub fn pidfd_open(target_pid: libc::pid_t) -> Result<libc::c_int> {
23+
pub fn pidfd_open(target_pid: libc::pid_t) -> Result<OwnedFd> {
2324
let flags: libc::c_uint = 0;
2425

2526
unsafe {
@@ -28,7 +29,7 @@ pub fn pidfd_open(target_pid: libc::pid_t) -> Result<libc::c_int> {
2829
if result < 0 {
2930
Err(io::Error::last_os_error().into())
3031
} else {
31-
Ok(result as libc::c_int)
32+
Ok(OwnedFd::from_raw_fd(result as libc::c_int))
3233
}
3334
}
3435
}
@@ -45,7 +46,7 @@ pub fn setns<'x>(
4546
let pid_fd = pidfd_open(target_pid)?;
4647

4748
unsafe {
48-
if libc::setns(pid_fd, flags) < 0 {
49+
if libc::setns(pid_fd.as_raw_fd(), flags) < 0 {
4950
Err(io::Error::last_os_error().into())
5051
} else {
5152
Ok(())

src/wrap.rs

Lines changed: 60 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
use std::env;
22
use std::ffi::CString;
33
use std::fs;
4-
use std::fs::File;
5-
use std::io::{Error, Read, Write};
4+
use std::io::Error;
65
use std::mem::MaybeUninit;
7-
use std::os::fd::FromRawFd;
86
use std::path::PathBuf;
97
use std::process;
108
use std::ptr;
@@ -23,6 +21,9 @@ use libc::{
2321
self, PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, PR_CAP_AMBIENT_RAISE, PR_CAPBSET_DROP,
2422
PR_SET_NO_NEW_PRIVS, c_int, prctl,
2523
};
24+
use nix::sys::eventfd::{EfdFlags, EventFd};
25+
use nix::sys::wait::{WaitPidFlag, WaitStatus, waitpid};
26+
use nix::unistd::{ForkResult, Pid, fork};
2627

2728
use log::{debug, error, warn};
2829

@@ -59,24 +60,20 @@ fn set_process_limit(resource: RLimit, limit: Option<u64>) -> Result<()> {
5960
}
6061

6162
fn reap_children() -> Result<()> {
62-
while unsafe { libc::waitpid(-1, ptr::null_mut(), libc::WNOHANG) } > 0 {}
63-
63+
loop {
64+
match waitpid(None, Some(WaitPidFlag::WNOHANG)) {
65+
Ok(WaitStatus::StillAlive) | Err(_) => break,
66+
_ => {}
67+
}
68+
}
6469
Ok(())
6570
}
6671

6772
fn wait_for_pid(pid: libc::pid_t) -> Result<i32> {
68-
let status = unsafe {
69-
let mut st: MaybeUninit<i32> = MaybeUninit::uninit();
70-
71-
if libc::waitpid(pid, st.as_mut_ptr(), 0) < 0 {
72-
panic!("waitpid of child process failed");
73-
}
74-
75-
st.assume_init()
76-
};
77-
78-
let exitcode = libc::WEXITSTATUS(status);
79-
Ok(exitcode)
73+
match waitpid(Pid::from_raw(pid), None)? {
74+
WaitStatus::Exited(_, code) => Ok(code),
75+
_ => Ok(1),
76+
}
8077
}
8178

8279
fn fork_and_wait() -> Result<()> {
@@ -85,15 +82,17 @@ fn fork_and_wait() -> Result<()> {
8582
process::exit(1)
8683
}
8784

88-
let pid = unsafe { libc::fork() };
89-
if pid > 0 {
90-
signal::store_child_pid(pid);
91-
debug!("child pid = {pid}");
92-
let exitcode = wait_for_pid(pid)?;
93-
debug!("[pid {pid}] exitcode = {exitcode}");
94-
debug!("reaping children of supervisor!");
95-
reap_children()?;
96-
process::exit(exitcode);
85+
match unsafe { fork() }? {
86+
ForkResult::Parent { child } => {
87+
signal::store_child_pid(child.as_raw());
88+
debug!("child pid = {}", child.as_raw());
89+
let exitcode = wait_for_pid(child.as_raw())?;
90+
debug!("[pid {}] exitcode = {exitcode}", child.as_raw());
91+
debug!("reaping children of supervisor!");
92+
reap_children()?;
93+
process::exit(exitcode);
94+
}
95+
ForkResult::Child => {}
9796
}
9897

9998
if let Err(e) = unsafe { signal::reset_child_signal_handlers() } {
@@ -523,54 +522,50 @@ impl Wrappable for CreateRequest {
523522
}
524523

525524
debug!("all namespaces unshared -- forking child");
526-
let parent_efd = unsafe { libc::eventfd(0, libc::EFD_SEMAPHORE) };
527-
let child_efd = unsafe { libc::eventfd(0, libc::EFD_SEMAPHORE) };
528-
let pid = unsafe { libc::fork() };
529-
if pid > 0 {
530-
signal::store_child_pid(pid);
531-
532-
debug!("child pid = {pid}");
533-
let mut pef = unsafe { File::from_raw_fd(parent_efd) };
534-
debug!("parent efd = {parent_efd}");
535-
debug!("child efd = {child_efd}");
536-
let mut buf = [0u8; 8];
537-
pef.read_exact(&mut buf)?;
525+
let parent_efd = EventFd::from_value_and_flags(0, EfdFlags::EFD_SEMAPHORE)?;
526+
let child_efd = EventFd::from_value_and_flags(0, EfdFlags::EFD_SEMAPHORE)?;
527+
match unsafe { fork() }? {
528+
ForkResult::Parent { child } => {
529+
signal::store_child_pid(child.as_raw());
530+
531+
debug!("child pid = {}", child.as_raw());
532+
parent_efd.read()?;
533+
534+
if target_ns.contains(&Namespace::User) {
535+
debug!("child has dropped into its own userns, configuring from supervisor");
536+
// In the two-stage path, the child calls pivot_fs() before signaling.
537+
// pivot_root() changes /proc for the parent too.
538+
// If a PID namespace was created, the new /proc shows the child as PID 1
539+
// (not its host PID), so we must use 1 to find it in /proc.
540+
// Without a PID namespace, the new proc mount still shows global PIDs.
541+
let userns_pid =
542+
if !skip_two_stage_userns && target_ns.contains(&Namespace::Pid) {
543+
1
544+
} else {
545+
child.as_raw()
546+
};
547+
self.prepare_userns(userns_pid)?;
548+
}
538549

539-
if target_ns.contains(&Namespace::User) {
540-
debug!("child has dropped into its own userns, configuring from supervisor");
541-
// In the two-stage path, the child calls pivot_fs() before signaling.
542-
// pivot_root() changes /proc for the parent too.
543-
// If a PID namespace was created, the new /proc shows the child as PID 1
544-
// (not its host PID), so we must use 1 to find it in /proc.
545-
// Without a PID namespace, the new proc mount still shows global PIDs.
546-
let userns_pid = if !skip_two_stage_userns && target_ns.contains(&Namespace::Pid) {
547-
1
548-
} else {
549-
pid
550-
};
551-
self.prepare_userns(userns_pid)?;
552-
}
550+
// The supervisor has now configured the user namespace, so let the first process run.
551+
child_efd.write(1)?;
553552

554-
// The supervisor has now configured the user namespace, so let the first process run.
555-
let mut cef = unsafe { File::from_raw_fd(child_efd) };
556-
cef.write_all(&1_u64.to_ne_bytes())?;
553+
let exitcode = wait_for_pid(child.as_raw())?;
554+
debug!("[pid {}] exitcode = {exitcode}", child.as_raw());
557555

558-
let exitcode = wait_for_pid(pid)?;
559-
debug!("[pid {pid}] exitcode = {exitcode}");
556+
debug!("reaping children of supervisor!");
557+
reap_children()?;
560558

561-
debug!("reaping children of supervisor!");
562-
reap_children()?;
563-
564-
process::exit(exitcode);
559+
process::exit(exitcode);
560+
}
561+
ForkResult::Child => {}
565562
}
566563

567564
if let Err(e) = unsafe { signal::reset_child_signal_handlers() } {
568565
error!("Failed to reset child signal handlers: {e}");
569566
process::exit(1);
570567
}
571568

572-
let mut pef = unsafe { File::from_raw_fd(parent_efd) };
573-
574569
if !skip_two_stage_userns {
575570
// The mount namespace was unshared in the parent under the initial user
576571
// namespace context. Mount operations must happen before we enter the new
@@ -592,14 +587,11 @@ impl Wrappable for CreateRequest {
592587
}
593588

594589
debug!("signalling supervisor to do configuration");
595-
pef.write_all(&2_u64.to_ne_bytes())?;
596-
pef.flush()?;
590+
parent_efd.write(2)?;
597591

598592
// Wait for completion from the supervisor before launching the initial process
599593
// for this container.
600-
let mut cef = unsafe { File::from_raw_fd(child_efd) };
601-
let mut buf = [0u8; 8];
602-
cef.read_exact(&mut buf)?;
594+
child_efd.read()?;
603595

604596
if skip_two_stage_userns {
605597
// In two-stage mode, mounts are deferred until after

0 commit comments

Comments
 (0)