From ab4d9e609ca3f699fdad0f828165da6b54263f08 Mon Sep 17 00:00:00 2001 From: jaredLunde <86995+jaredLunde@users.noreply.github.com> Date: Thu, 25 Jun 2026 04:21:44 +0000 Subject: [PATCH 1/2] Fix ublk buffer pool UB potential by making as_mut_slice return Option Co-authored-by: capy-ai[bot] <230910855+capy-ai[bot]@users.noreply.github.com> --- glidefs/src/block/ublk/buffer_pool.rs | 30 +++++++++++++++++++++------ glidefs/src/block/ublk/device.rs | 10 ++++++++- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/glidefs/src/block/ublk/buffer_pool.rs b/glidefs/src/block/ublk/buffer_pool.rs index 9da4eea..d43c649 100644 --- a/glidefs/src/block/ublk/buffer_pool.rs +++ b/glidefs/src/block/ublk/buffer_pool.rs @@ -240,9 +240,11 @@ pub struct PoolSlot { impl PoolSlot { #[inline] - pub fn as_mut_slice(&mut self, len: usize) -> &mut [u8] { - debug_assert!(len <= SLOT_SIZE, "slot len {len} exceeds slot size {SLOT_SIZE}"); - unsafe { std::slice::from_raw_parts_mut(self.ptr, len) } + pub fn as_mut_slice(&mut self, len: usize) -> Option<&mut [u8]> { + if len > SLOT_SIZE { + return None; + } + Some(unsafe { std::slice::from_raw_parts_mut(self.ptr, len) }) } #[inline] @@ -343,10 +345,10 @@ impl IoBuf { /// The first `len` bytes of the buffer. `len` is bounded by `SLOT_SIZE` /// upstream (a single ublk I/O never exceeds `IO_BUF_BYTES`). #[inline] - pub fn as_mut_slice(&mut self, len: usize) -> &mut [u8] { + pub fn as_mut_slice(&mut self, len: usize) -> Option<&mut [u8]> { match self { IoBuf::Pooled(slot) => slot.as_mut_slice(len), - IoBuf::Heap(v) => &mut v[..len], + IoBuf::Heap(v) => v.get_mut(..len), } } } @@ -379,6 +381,9 @@ fn try_alloc_zeroed(len: usize) -> Option> { /// per-I/O sibling of the init-path fix: neither `mmap` nor `vec` failure may /// take down storage for every VM on the host. pub async fn acquire_io_buf(len: usize) -> Option { + if len > SLOT_SIZE { + return None; + } match worker_pool() { Some(pool) => Some(IoBuf::Pooled(pool.acquire().await)), None => match try_alloc_zeroed(len) { @@ -426,7 +431,7 @@ mod tests { fn try_acquire_round_trip() { let pool = Rc::new(WorkerBufferPool::new().unwrap()); let mut slot = pool.try_acquire().unwrap(); - let buf = slot.as_mut_slice(4096); + let buf = slot.as_mut_slice(4096).unwrap(); buf[0] = 0x42; buf[4095] = 0x42; assert_eq!(pool.acquires.load(Ordering::Relaxed), 1); @@ -434,6 +439,19 @@ mod tests { assert_eq!(pool.free.borrow().len(), POOL_SLOTS); } + #[test] + fn pool_slot_rejects_oversized_slice() { + let pool = Rc::new(WorkerBufferPool::new().unwrap()); + let mut slot = pool.try_acquire().unwrap(); + assert!(slot.as_mut_slice(SLOT_SIZE).is_some()); + assert!(slot.as_mut_slice(SLOT_SIZE + 1).is_none()); + } + + #[tokio::test] + async fn acquire_io_buf_rejects_oversized_len() { + assert!(acquire_io_buf(SLOT_SIZE + 1).await.is_none()); + } + #[test] fn try_acquire_returns_none_on_exhaustion() { let pool = Rc::new(WorkerBufferPool::new().unwrap()); diff --git a/glidefs/src/block/ublk/device.rs b/glidefs/src/block/ublk/device.rs index 25627c9..7d298ff 100644 --- a/glidefs/src/block/ublk/device.rs +++ b/glidefs/src/block/ublk/device.rs @@ -2140,7 +2140,15 @@ async fn io_task_user_copy( q.submit_io_commit_cmd(tag, BufDesc::Slice(&[]), -libc::EIO).await?; continue; }; - let buf: &mut [u8] = iobuf.as_mut_slice(length as usize); + let Some(buf) = iobuf.as_mut_slice(length as usize) else { + tracing::error!( + qid, tag, length, + max_len = super::buffer_pool::SLOT_SIZE, + "bounce buffer length exceeds slot size — failing this I/O with EIO", + ); + q.submit_io_commit_cmd(tag, BufDesc::Slice(&[]), -libc::EIO).await?; + continue; + }; if op == sys::UBLK_IO_OP_WRITE { // Copy WRITE data out of the kernel cmd buffer into ours. From 714407904bc1b83e57518bb5f116bb761efcf1eb Mon Sep 17 00:00:00 2001 From: jaredLunde <86995+jaredLunde@users.noreply.github.com> Date: Thu, 25 Jun 2026 04:41:10 +0000 Subject: [PATCH 2/2] Clarify oversized ublk bounce buffer failures Co-authored-by: capy-ai[bot] <230910855+capy-ai[bot]@users.noreply.github.com> --- glidefs/src/block/ublk/buffer_pool.rs | 10 +++++----- glidefs/src/block/ublk/device.rs | 13 ++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/glidefs/src/block/ublk/buffer_pool.rs b/glidefs/src/block/ublk/buffer_pool.rs index d43c649..8a47993 100644 --- a/glidefs/src/block/ublk/buffer_pool.rs +++ b/glidefs/src/block/ublk/buffer_pool.rs @@ -375,11 +375,11 @@ fn try_alloc_zeroed(len: usize) -> Option> { /// (init OOM, see [`worker_pool`]), a fallibly-allocated heap buffer — counted /// in `GLOBAL_HEAP_FALLBACKS` so sustained degradation is alertable. /// -/// Returns `None` only in the doubly-degraded case where the pool is absent -/// *and* even the heap fallback can't be committed (host critically OOM). The -/// caller must fail that single I/O with `EIO` — never abort. This is the -/// per-I/O sibling of the init-path fix: neither `mmap` nor `vec` failure may -/// take down storage for every VM on the host. +/// Returns `None` for an invalid oversized request or in the doubly-degraded +/// case where the pool is absent and even the heap fallback can't be committed +/// (host critically OOM). The caller must fail that single I/O with `EIO` — +/// never abort. This is the per-I/O sibling of the init-path fix: neither +/// `mmap` nor `vec` failure may take down storage for every VM on the host. pub async fn acquire_io_buf(len: usize) -> Option { if len > SLOT_SIZE { return None; diff --git a/glidefs/src/block/ublk/device.rs b/glidefs/src/block/ublk/device.rs index 7d298ff..04e214d 100644 --- a/glidefs/src/block/ublk/device.rs +++ b/glidefs/src/block/ublk/device.rs @@ -2125,17 +2125,16 @@ async fn io_task_user_copy( // the daemon keeps serving instead of aborting; the worker // upgrades back to the pool once memory recovers. // - // `None` is the doubly-degraded case: no pool *and* the heap - // fallback couldn't be committed (host critically OOM). Fail - // just this one I/O with EIO — the daemon stays up serving - // every other tag and VM; the kernel retries the I/O. The - // alternative, an infallible alloc, would SIGABRT the whole - // daemon and take down storage host-wide. + // `None` means either an invalid oversized request or the + // doubly-degraded case: no pool *and* the heap fallback couldn't + // be committed (host critically OOM). Fail just this one I/O + // with EIO — the daemon stays up serving every other tag and VM. let Some(mut iobuf) = super::buffer_pool::acquire_io_buf(length as usize).await else { tracing::error!( qid, tag, length, - "bounce buffer alloc failed (host OOM) — failing this I/O with EIO", + max_len = super::buffer_pool::SLOT_SIZE, + "bounce buffer unavailable or length invalid — failing this I/O with EIO", ); q.submit_io_commit_cmd(tag, BufDesc::Slice(&[]), -libc::EIO).await?; continue;