@@ -15,7 +15,8 @@ use nix::sys::socket::{shutdown, Shutdown};
1515use std:: io;
1616use std:: os:: fd:: RawFd ;
1717use std:: os:: unix:: prelude:: { AsRawFd , FromRawFd , IntoRawFd , OwnedFd } ;
18- use std:: sync:: Mutex ;
18+ use std:: sync:: { Arc , Mutex } ;
19+ use std:: sync:: atomic:: { AtomicBool , Ordering } ;
1920use std:: thread;
2021use std:: time:: { Duration , Instant } ;
2122use tokio:: net:: { UnixListener , UnixStream } ;
@@ -51,20 +52,117 @@ pub fn start_master_listener_unix(master_pid: i32) -> io::Result<()> {
5152 let handle = thread:: Builder :: new ( )
5253 . name ( "dd-sidecar" . into ( ) )
5354 . spawn ( move || {
54- let acquire_listener = move || -> io:: Result < _ > {
55- std_listener. set_nonblocking ( true ) ?;
56- let listener = UnixListener :: from_std ( std_listener. try_clone ( ) ?) ?;
57- let cancel = {
58- let fd = listener. as_raw_fd ( ) ;
59- move || stop_listening ( fd)
60- } ;
61- Ok ( ( move |handler| accept_socket_loop ( listener, handler) , cancel) )
55+ // Use blocking I/O - no shared tokio Runtime needed
56+ // This makes the code fork-safe
57+ use crate :: service:: sidecar_server:: SidecarServer ;
58+ let runtime = match tokio:: runtime:: Builder :: new_current_thread ( )
59+ . enable_all ( )
60+ . build ( )
61+ {
62+ Ok ( rt) => rt,
63+ Err ( e) => {
64+ error ! ( "Failed to create runtime for server initialization: {}" , e) ;
65+ return ;
66+ }
6267 } ;
6368
64- let _ = enter_listener_loop ( acquire_listener) . map_err ( |e| {
65- error ! ( "enter_listener_loop failed: {}" , e) ;
66- e
67- } ) ;
69+ let server = runtime. block_on ( async { SidecarServer :: default ( ) } ) ;
70+
71+ // Shutdown flag to signal connection threads to stop
72+ let shutdown_flag = Arc :: new ( AtomicBool :: new ( false ) ) ;
73+
74+ // Track connection threads for cleanup during shutdown
75+ let mut handler_threads: Vec < thread:: JoinHandle < ( ) > > = Vec :: new ( ) ;
76+
77+ loop {
78+ // Clean up finished threads to avoid accumulating handles
79+ handler_threads. retain ( |h| !h. is_finished ( ) ) ;
80+
81+ match std_listener. accept ( ) {
82+ Ok ( ( stream, _addr) ) => {
83+ let server = server. clone ( ) ;
84+ let shutdown = shutdown_flag. clone ( ) ;
85+
86+ // Spawn a thread for each connection
87+ match thread:: Builder :: new ( ) . name ( "dd-conn-handler" . into ( ) ) . spawn (
88+ move || {
89+ // Create a minimal single-threaded runtime for this connection only
90+ // This runtime will be dropped when the connection closes
91+ let runtime = match tokio:: runtime:: Builder :: new_current_thread ( )
92+ . enable_all ( )
93+ . build ( )
94+ {
95+ Ok ( rt) => rt,
96+ Err ( e) => {
97+ error ! ( "Failed to create runtime for connection: {}" , e) ;
98+ return ;
99+ }
100+ } ;
101+
102+ runtime. block_on ( async move {
103+ // Check shutdown flag
104+ if shutdown. load ( Ordering :: Relaxed ) {
105+ return ;
106+ }
107+
108+ // Convert std UnixStream to tokio UnixStream
109+ if let Err ( e) = stream. set_nonblocking ( true ) {
110+ error ! ( "Failed to set nonblocking: {}" , e) ;
111+ return ;
112+ }
113+
114+ let tokio_stream = match UnixStream :: from_std ( stream) {
115+ Ok ( s) => s,
116+ Err ( e) => {
117+ error ! ( "Failed to convert stream: {}" , e) ;
118+ return ;
119+ }
120+ } ;
121+
122+ // Handle the connection using existing async infrastructure
123+ use datadog_ipc:: platform:: AsyncChannel ;
124+
125+ // Use the cloned shared server
126+ server
127+ . accept_connection ( AsyncChannel :: from ( tokio_stream) )
128+ . await ;
129+ } ) ;
130+ } ,
131+ ) {
132+ Ok ( handle) => handler_threads. push ( handle) ,
133+ Err ( e) => error ! ( "Failed to spawn handler thread: {}" , e) ,
134+ }
135+ }
136+ Err ( e) => {
137+ match e. kind ( ) {
138+ io:: ErrorKind :: Interrupted => continue ,
139+ io:: ErrorKind :: InvalidInput => break , // Socket shut down
140+ _ => {
141+ error ! ( "Accept error: {}" , e) ;
142+ thread:: sleep ( Duration :: from_millis ( 100 ) ) ;
143+ }
144+ }
145+ }
146+ }
147+ }
148+
149+ info ! ( "Master listener stopped accepting connections" ) ;
150+
151+ // Signal all connection threads to stop
152+ shutdown_flag. store ( true , Ordering :: Relaxed ) ;
153+
154+ // Shutdown the server - this should close active connections
155+ server. shutdown ( ) ;
156+
157+ // Now join all connection threads - they should exit quickly
158+ // since connections are closed and shutdown flag is set
159+ info ! ( "Waiting for {} connection threads to finish" , handler_threads. len( ) ) ;
160+ for ( i, handle) in handler_threads. into_iter ( ) . enumerate ( ) {
161+ if let Err ( e) = handle. join ( ) {
162+ error ! ( "Connection thread {} panicked: {:?}" , i, e) ;
163+ }
164+ }
165+ info ! ( "All connection threads finished" ) ;
68166 } )
69167 . map_err ( io:: Error :: other) ?;
70168
@@ -95,6 +193,7 @@ pub fn connect_worker_unix(master_pid: i32) -> io::Result<SidecarTransport> {
95193 }
96194 }
97195
196+ error ! ( "Worker failed to connect after 10 attempts" ) ;
98197 Err ( last_error. unwrap_or_else ( || io:: Error :: other ( "Connection failed" ) ) )
99198}
100199
@@ -112,28 +211,35 @@ pub fn shutdown_master_listener_unix() -> io::Result<()> {
112211
113212 if let Some ( ( handle, fd) ) = listener_data {
114213 stop_listening ( fd) ;
214+ let _ = handle. join ( ) ;
215+ }
115216
116- // Try to join with a timeout to avoid hanging the shutdown
117- // We spawn a helper thread to do the join so we can implement a timeout
118- let ( tx, rx) = std:: sync:: mpsc:: channel ( ) ;
119- std:: thread:: spawn ( move || {
120- let result = handle. join ( ) ;
121- let _ = tx. send ( result) ;
122- } ) ;
123-
124- // Wait up to 2 seconds for clean shutdown (including time for tokio runtime shutdown)
125- match rx. recv_timeout ( Duration :: from_millis ( 2000 ) ) {
126- Ok ( Ok ( ( ) ) ) => {
127- // Clean shutdown
128- }
129- Ok ( Err ( _) ) => {
130- error ! ( "Listener thread panicked during shutdown" ) ;
131- }
132- Err ( _) => {
133- // Timeout - thread didn't exit in time
134- // This is acceptable as the OS will clean up when the process exits
217+ Ok ( ( ) )
218+ }
219+
220+ /// Clears inherited resources in child processes after fork().
221+ /// With the new blocking I/O approach, we only need to forget the listener thread handle.
222+ /// Each connection creates its own short-lived runtime, so there's no global runtime to inherit.
223+ pub fn clear_inherited_listener_unix ( ) -> io:: Result < ( ) > {
224+ info ! ( "Child process clearing inherited listener state" ) ;
225+ match MASTER_LISTENER . lock ( ) {
226+ Ok ( mut guard) => {
227+ if let Some ( ( handle, _fd) ) = guard. take ( ) {
228+ info ! ( "Child forgetting inherited listener thread handle" ) ;
229+ // Forget the handle without joining - parent owns the thread
230+ std:: mem:: forget ( handle) ;
231+ info ! ( "Child successfully forgot listener handle" ) ;
232+ } else {
233+ info ! ( "Child found no listener to clear" ) ;
135234 }
136235 }
236+ Err ( e) => {
237+ error ! (
238+ "Failed to acquire lock for clearing inherited listener: {}" ,
239+ e
240+ ) ;
241+ return Err ( io:: Error :: other ( "Mutex poisoned" ) ) ;
242+ }
137243 }
138244
139245 Ok ( ( ) )
0 commit comments