11use std:: env;
22use std:: ffi:: CString ;
33use std:: fs;
4- use std:: fs:: File ;
5- use std:: io:: { Error , Read , Write } ;
4+ use std:: io:: Error ;
65use std:: mem:: MaybeUninit ;
7- use std:: os:: fd:: FromRawFd ;
86use std:: path:: PathBuf ;
97use std:: process;
108use std:: ptr;
@@ -23,6 +21,9 @@ use libc::{
2321 self , PR_CAP_AMBIENT , PR_CAP_AMBIENT_LOWER , PR_CAP_AMBIENT_RAISE , PR_CAPBSET_DROP ,
2422 PR_SET_NO_NEW_PRIVS , c_int, prctl,
2523} ;
24+ use nix:: sys:: eventfd:: { EfdFlags , EventFd } ;
25+ use nix:: sys:: wait:: { WaitPidFlag , WaitStatus , waitpid} ;
26+ use nix:: unistd:: { ForkResult , Pid , fork} ;
2627
2728use log:: { debug, error, warn} ;
2829
@@ -59,24 +60,20 @@ fn set_process_limit(resource: RLimit, limit: Option<u64>) -> Result<()> {
5960}
6061
6162fn reap_children ( ) -> Result < ( ) > {
62- while unsafe { libc:: waitpid ( -1 , ptr:: null_mut ( ) , libc:: WNOHANG ) } > 0 { }
63-
63+ loop {
64+ match waitpid ( None , Some ( WaitPidFlag :: WNOHANG ) ) {
65+ Ok ( WaitStatus :: StillAlive ) | Err ( _) => break ,
66+ _ => { }
67+ }
68+ }
6469 Ok ( ( ) )
6570}
6671
6772fn wait_for_pid ( pid : libc:: pid_t ) -> Result < i32 > {
68- let status = unsafe {
69- let mut st: MaybeUninit < i32 > = MaybeUninit :: uninit ( ) ;
70-
71- if libc:: waitpid ( pid, st. as_mut_ptr ( ) , 0 ) < 0 {
72- panic ! ( "waitpid of child process failed" ) ;
73- }
74-
75- st. assume_init ( )
76- } ;
77-
78- let exitcode = libc:: WEXITSTATUS ( status) ;
79- Ok ( exitcode)
73+ match waitpid ( Pid :: from_raw ( pid) , None ) ? {
74+ WaitStatus :: Exited ( _, code) => Ok ( code) ,
75+ _ => Ok ( 1 ) ,
76+ }
8077}
8178
8279fn fork_and_wait ( ) -> Result < ( ) > {
@@ -85,15 +82,17 @@ fn fork_and_wait() -> Result<()> {
8582 process:: exit ( 1 )
8683 }
8784
88- let pid = unsafe { libc:: fork ( ) } ;
89- if pid > 0 {
90- signal:: store_child_pid ( pid) ;
91- debug ! ( "child pid = {pid}" ) ;
92- let exitcode = wait_for_pid ( pid) ?;
93- debug ! ( "[pid {pid}] exitcode = {exitcode}" ) ;
94- debug ! ( "reaping children of supervisor!" ) ;
95- reap_children ( ) ?;
96- process:: exit ( exitcode) ;
85+ match unsafe { fork ( ) } ? {
86+ ForkResult :: Parent { child } => {
87+ signal:: store_child_pid ( child. as_raw ( ) ) ;
88+ debug ! ( "child pid = {}" , child. as_raw( ) ) ;
89+ let exitcode = wait_for_pid ( child. as_raw ( ) ) ?;
90+ debug ! ( "[pid {}] exitcode = {exitcode}" , child. as_raw( ) ) ;
91+ debug ! ( "reaping children of supervisor!" ) ;
92+ reap_children ( ) ?;
93+ process:: exit ( exitcode) ;
94+ }
95+ ForkResult :: Child => { }
9796 }
9897
9998 if let Err ( e) = unsafe { signal:: reset_child_signal_handlers ( ) } {
@@ -523,54 +522,50 @@ impl Wrappable for CreateRequest {
523522 }
524523
525524 debug ! ( "all namespaces unshared -- forking child" ) ;
526- let parent_efd = unsafe { libc:: eventfd ( 0 , libc:: EFD_SEMAPHORE ) } ;
527- let child_efd = unsafe { libc:: eventfd ( 0 , libc:: EFD_SEMAPHORE ) } ;
528- let pid = unsafe { libc:: fork ( ) } ;
529- if pid > 0 {
530- signal:: store_child_pid ( pid) ;
531-
532- debug ! ( "child pid = {pid}" ) ;
533- let mut pef = unsafe { File :: from_raw_fd ( parent_efd) } ;
534- debug ! ( "parent efd = {parent_efd}" ) ;
535- debug ! ( "child efd = {child_efd}" ) ;
536- let mut buf = [ 0u8 ; 8 ] ;
537- pef. read_exact ( & mut buf) ?;
525+ let parent_efd = EventFd :: from_value_and_flags ( 0 , EfdFlags :: EFD_SEMAPHORE ) ?;
526+ let child_efd = EventFd :: from_value_and_flags ( 0 , EfdFlags :: EFD_SEMAPHORE ) ?;
527+ match unsafe { fork ( ) } ? {
528+ ForkResult :: Parent { child } => {
529+ signal:: store_child_pid ( child. as_raw ( ) ) ;
530+
531+ debug ! ( "child pid = {}" , child. as_raw( ) ) ;
532+ parent_efd. read ( ) ?;
533+
534+ if target_ns. contains ( & Namespace :: User ) {
535+ debug ! ( "child has dropped into its own userns, configuring from supervisor" ) ;
536+ // In the two-stage path, the child calls pivot_fs() before signaling.
537+ // pivot_root() changes /proc for the parent too.
538+ // If a PID namespace was created, the new /proc shows the child as PID 1
539+ // (not its host PID), so we must use 1 to find it in /proc.
540+ // Without a PID namespace, the new proc mount still shows global PIDs.
541+ let userns_pid =
542+ if !skip_two_stage_userns && target_ns. contains ( & Namespace :: Pid ) {
543+ 1
544+ } else {
545+ child. as_raw ( )
546+ } ;
547+ self . prepare_userns ( userns_pid) ?;
548+ }
538549
539- if target_ns. contains ( & Namespace :: User ) {
540- debug ! ( "child has dropped into its own userns, configuring from supervisor" ) ;
541- // In the two-stage path, the child calls pivot_fs() before signaling.
542- // pivot_root() changes /proc for the parent too.
543- // If a PID namespace was created, the new /proc shows the child as PID 1
544- // (not its host PID), so we must use 1 to find it in /proc.
545- // Without a PID namespace, the new proc mount still shows global PIDs.
546- let userns_pid = if !skip_two_stage_userns && target_ns. contains ( & Namespace :: Pid ) {
547- 1
548- } else {
549- pid
550- } ;
551- self . prepare_userns ( userns_pid) ?;
552- }
550+ // The supervisor has now configured the user namespace, so let the first process run.
551+ child_efd. write ( 1 ) ?;
553552
554- // The supervisor has now configured the user namespace, so let the first process run.
555- let mut cef = unsafe { File :: from_raw_fd ( child_efd) } ;
556- cef. write_all ( & 1_u64 . to_ne_bytes ( ) ) ?;
553+ let exitcode = wait_for_pid ( child. as_raw ( ) ) ?;
554+ debug ! ( "[pid {}] exitcode = {exitcode}" , child. as_raw( ) ) ;
557555
558- let exitcode = wait_for_pid ( pid ) ? ;
559- debug ! ( "[pid {pid}] exitcode = {exitcode}" ) ;
556+ debug ! ( "reaping children of supervisor!" ) ;
557+ reap_children ( ) ? ;
560558
561- debug ! ( "reaping children of supervisor!" ) ;
562- reap_children ( ) ?;
563-
564- process:: exit ( exitcode) ;
559+ process:: exit ( exitcode) ;
560+ }
561+ ForkResult :: Child => { }
565562 }
566563
567564 if let Err ( e) = unsafe { signal:: reset_child_signal_handlers ( ) } {
568565 error ! ( "Failed to reset child signal handlers: {e}" ) ;
569566 process:: exit ( 1 ) ;
570567 }
571568
572- let mut pef = unsafe { File :: from_raw_fd ( parent_efd) } ;
573-
574569 if !skip_two_stage_userns {
575570 // The mount namespace was unshared in the parent under the initial user
576571 // namespace context. Mount operations must happen before we enter the new
@@ -592,14 +587,11 @@ impl Wrappable for CreateRequest {
592587 }
593588
594589 debug ! ( "signalling supervisor to do configuration" ) ;
595- pef. write_all ( & 2_u64 . to_ne_bytes ( ) ) ?;
596- pef. flush ( ) ?;
590+ parent_efd. write ( 2 ) ?;
597591
598592 // Wait for completion from the supervisor before launching the initial process
599593 // for this container.
600- let mut cef = unsafe { File :: from_raw_fd ( child_efd) } ;
601- let mut buf = [ 0u8 ; 8 ] ;
602- cef. read_exact ( & mut buf) ?;
594+ child_efd. read ( ) ?;
603595
604596 if skip_two_stage_userns {
605597 // In two-stage mode, mounts are deferred until after
0 commit comments