From 0f7df1bcf8b938c4367d68e666f08ae40d03d71a Mon Sep 17 00:00:00 2001 From: Jared Lunde Date: Thu, 25 Jun 2026 21:03:51 -0700 Subject: [PATCH] fix(ublk-zc): close wake eventfd if ZC thread spawn fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cross-thread wake eventfd is created before the ZC queue thread is spawned, but only becomes owned by `ZcThreadGuard` after a successful spawn. If `Builder::spawn` failed (e.g. resource exhaustion during a multi-device recovery storm — exactly when fds are scarce), the `?` returned early and leaked one eventfd per queue, since the guard that would `close` it was never constructed. Wrap `wake_fd` in an `OwnedFd` immediately after creation so any early return closes it via Drop, then release ownership to the guard once the spawn succeeds. The ZC thread only borrows the raw value (RawFd is Copy), so ownership semantics are unchanged on the happy path. Co-Authored-By: Claude Opus 4.8 (1M context) --- glidefs/src/block/ublk/device.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/glidefs/src/block/ublk/device.rs b/glidefs/src/block/ublk/device.rs index 25627c9..ec05ea6 100644 --- a/glidefs/src/block/ublk/device.rs +++ b/glidefs/src/block/ublk/device.rs @@ -1994,6 +1994,13 @@ pub(super) async fn io_task_zero_copy( if wake_fd < 0 { return Err(UblkError::IOError(std::io::Error::last_os_error())); } + // Own the eventfd immediately so any early return before the + // `ZcThreadGuard` is constructed (notably thread-spawn failure under + // resource exhaustion) closes it instead of leaking one fd per queue. + // The ZC thread only borrows the raw value (RawFd is Copy); ownership + // stays here until we hand it to the guard below. + let wake_owned = + unsafe { ::from_raw_fd(wake_fd) }; let (done_tx, done_rx) = tokio::sync::oneshot::channel(); let join = std::thread::Builder::new() @@ -2010,6 +2017,9 @@ pub(super) async fn io_task_zero_copy( let _ = done_tx.send(result); }) .map_err(|e| UblkError::IOError(std::io::Error::other(e.to_string())))?; + // Spawn succeeded: release ownership without closing — the guard now + // manages the fd's lifetime (close after join, see below). + let wake_fd: RawFd = std::os::fd::IntoRawFd::into_raw_fd(wake_owned); // Drop guard: if the future is cancelled (worker pool shutdown // during handoff cutover, panic during normal shutdown, etc.):