diff options
author | Danilo Krummrich <dakr@kernel.org> | 2025-06-26 22:00:41 +0200 |
---|---|---|
committer | Danilo Krummrich <dakr@kernel.org> | 2025-06-28 18:08:50 +0200 |
commit | f5d3ef25d238901a76fe0277787afa44f7714739 (patch) | |
tree | 9c57a14c2cddb8d78f710f8d26e6896398650486 /rust/kernel | |
parent | 46ae8fd7386abf809355d1857abac5cf2d7c3f62 (diff) |
rust: devres: get rid of Devres' inner Arc
So far Devres uses an inner memory allocation and reference count, i.e.
an inner Arc, in order to ensure that the devres callback can't run into
a use-after-free in case where the Devres object is dropped while the
devres callback runs concurrently.
Instead, use a completion in order to avoid a potential UAF: In
Devres::drop(), if we detect that we can't remove the devres action
anymore, we wait for the completion that is completed from the devres
callback. If, in turn, we were able to successfully remove the devres
action, we can just go ahead.
This, again, allows us to get rid of the internal Arc, and instead let
Devres consume an `impl PinInit<T, E>` in order to return an
`impl PinInit<Devres<T>, E>`, which enables us to get away with less
memory allocations.
Additionally, having the resulting explicit synchronization in
Devres::drop() prevents potential subtle undesired side effects of the
devres callback dropping the final Arc reference asynchronously within
the devres callback.
Reviewed-by: Benno Lossin <lossin@kernel.org>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lore.kernel.org/r/20250626200054.243480-4-dakr@kernel.org
[ Move '# Invariants' below '# Examples'. - Danilo ]
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
Diffstat (limited to 'rust/kernel')
-rw-r--r-- | rust/kernel/devres.rs | 213 | ||||
-rw-r--r-- | rust/kernel/pci.rs | 20 |
2 files changed, 136 insertions, 97 deletions
diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 64458ca3d69f..7c5c5de8bcb6 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -13,16 +13,21 @@ use crate::{ ffi::c_void, prelude::*, revocable::{Revocable, RevocableGuard}, - sync::{rcu, Arc, Completion}, - types::{ARef, ForeignOwnable}, + sync::{rcu, Completion}, + types::{ARef, ForeignOwnable, Opaque, ScopeGuard}, }; +use pin_init::Wrapper; + +/// [`Devres`] inner data accessed from [`Devres::callback`]. #[pin_data] -struct DevresInner<T: Send> { - dev: ARef<Device>, - callback: unsafe extern "C" fn(*mut c_void), +struct Inner<T: Send> { #[pin] data: Revocable<T>, + /// Tracks whether [`Devres::callback`] has been completed. + #[pin] + devm: Completion, + /// Tracks whether revoking [`Self::data`] has been completed. #[pin] revoke: Completion, } @@ -88,100 +93,115 @@ struct DevresInner<T: Send> { /// # fn no_run(dev: &Device<Bound>) -> Result<(), Error> { /// // SAFETY: Invalid usage for example purposes. /// let iomem = unsafe { IoMem::<{ core::mem::size_of::<u32>() }>::new(0xBAAAAAAD)? }; -/// let devres = Devres::new(dev, iomem, GFP_KERNEL)?; +/// let devres = KBox::pin_init(Devres::new(dev, iomem), GFP_KERNEL)?; /// /// let res = devres.try_access().ok_or(ENXIO)?; /// res.write8(0x42, 0x0); /// # Ok(()) /// # } /// ``` -pub struct Devres<T: Send>(Arc<DevresInner<T>>); +/// +/// # Invariants +/// +/// [`Self::inner`] is guaranteed to be initialized and is always accessed read-only. +#[pin_data(PinnedDrop)] +pub struct Devres<T: Send> { + dev: ARef<Device>, + /// Pointer to [`Self::devres_callback`]. + /// + /// Has to be stored, since Rust does not guarantee to always return the same address for a + /// function. However, the C API uses the address as a key. + callback: unsafe extern "C" fn(*mut c_void), + /// Contains all the fields shared with [`Self::callback`]. + // TODO: Replace with `UnsafePinned`, once available. + // + // Subsequently, the `drop_in_place()` in `Devres::drop` and the explicit `Send` and `Sync' + // impls can be removed. + #[pin] + inner: Opaque<Inner<T>>, +} + +impl<T: Send> Devres<T> { + /// Creates a new [`Devres`] instance of the given `data`. + /// + /// The `data` encapsulated within the returned `Devres` instance' `data` will be + /// (revoked)[`Revocable`] once the device is detached. + pub fn new<'a, E>( + dev: &'a Device<Bound>, + data: impl PinInit<T, E> + 'a, + ) -> impl PinInit<Self, Error> + 'a + where + T: 'a, + Error: From<E>, + { + let callback = Self::devres_callback; -impl<T: Send> DevresInner<T> { - fn new(dev: &Device<Bound>, data: T, flags: Flags) -> Result<Arc<DevresInner<T>>> { - let inner = Arc::pin_init( - try_pin_init!( DevresInner { - dev: dev.into(), - callback: Self::devres_callback, + try_pin_init!(&this in Self { + // INVARIANT: `inner` is properly initialized. + inner <- Opaque::pin_init(try_pin_init!(Inner { data <- Revocable::new(data), + devm <- Completion::new(), revoke <- Completion::new(), - }), - flags, - )?; - - // Convert `Arc<DevresInner>` into a raw pointer and make devres own this reference until - // `Self::devres_callback` is called. - let data = inner.clone().into_raw(); + })), + callback, + dev: { + // SAFETY: `this` is a valid pointer to uninitialized memory. + let inner = unsafe { &raw mut (*this.as_ptr()).inner }; - // SAFETY: `devm_add_action` guarantees to call `Self::devres_callback` once `dev` is - // detached. - let ret = - unsafe { bindings::devm_add_action(dev.as_raw(), Some(inner.callback), data as _) }; - - if ret != 0 { - // SAFETY: We just created another reference to `inner` in order to pass it to - // `bindings::devm_add_action`. If `bindings::devm_add_action` fails, we have to drop - // this reference accordingly. - let _ = unsafe { Arc::from_raw(data) }; - return Err(Error::from_errno(ret)); - } + // SAFETY: + // - `dev.as_raw()` is a pointer to a valid bound device. + // - `inner` is guaranteed to be a valid for the duration of the lifetime of `Self`. + // - `devm_add_action()` is guaranteed not to call `callback` until `this` has been + // properly initialized, because we require `dev` (i.e. the *bound* device) to + // live at least as long as the returned `impl PinInit<Self, Error>`. + to_result(unsafe { + bindings::devm_add_action(dev.as_raw(), Some(callback), inner.cast()) + })?; - Ok(inner) + dev.into() + }, + }) } - fn as_ptr(&self) -> *const Self { - self as _ + fn inner(&self) -> &Inner<T> { + // SAFETY: By the type invairants of `Self`, `inner` is properly initialized and always + // accessed read-only. + unsafe { &*self.inner.get() } } - fn remove_action(this: &Arc<Self>) -> bool { - // SAFETY: - // - `self.inner.dev` is a valid `Device`, - // - the `action` and `data` pointers are the exact same ones as given to devm_add_action() - // previously, - // - `self` is always valid, even if the action has been released already. - let success = unsafe { - bindings::devm_remove_action_nowarn( - this.dev.as_raw(), - Some(this.callback), - this.as_ptr() as _, - ) - } == 0; - - if success { - // SAFETY: We leaked an `Arc` reference to devm_add_action() in `DevresInner::new`; if - // devm_remove_action_nowarn() was successful we can (and have to) claim back ownership - // of this reference. - let _ = unsafe { Arc::from_raw(this.as_ptr()) }; - } - - success + fn data(&self) -> &Revocable<T> { + &self.inner().data } #[allow(clippy::missing_safety_doc)] unsafe extern "C" fn devres_callback(ptr: *mut kernel::ffi::c_void) { - let ptr = ptr as *mut DevresInner<T>; - // Devres owned this memory; now that we received the callback, drop the `Arc` and hence the - // reference. - // SAFETY: Safe, since we leaked an `Arc` reference to devm_add_action() in - // `DevresInner::new`. - let inner = unsafe { Arc::from_raw(ptr) }; + // SAFETY: In `Self::new` we've passed a valid pointer to `Inner` to `devm_add_action()`, + // hence `ptr` must be a valid pointer to `Inner`. + let inner = unsafe { &*ptr.cast::<Inner<T>>() }; + + // Ensure that `inner` can't be used anymore after we signal completion of this callback. + let inner = ScopeGuard::new_with_data(inner, |inner| inner.devm.complete_all()); if !inner.data.revoke() { // If `revoke()` returns false, it means that `Devres::drop` already started revoking - // `inner.data` for us. Hence we have to wait until `Devres::drop()` signals that it - // completed revoking `inner.data`. + // `data` for us. Hence we have to wait until `Devres::drop` signals that it + // completed revoking `data`. inner.revoke.wait_for_completion(); } } -} -impl<T: Send> Devres<T> { - /// Creates a new [`Devres`] instance of the given `data`. The `data` encapsulated within the - /// returned `Devres` instance' `data` will be revoked once the device is detached. - pub fn new(dev: &Device<Bound>, data: T, flags: Flags) -> Result<Self> { - let inner = DevresInner::new(dev, data, flags)?; - - Ok(Devres(inner)) + fn remove_action(&self) -> bool { + // SAFETY: + // - `self.dev` is a valid `Device`, + // - the `action` and `data` pointers are the exact same ones as given to + // `devm_add_action()` previously, + (unsafe { + bindings::devm_remove_action_nowarn( + self.dev.as_raw(), + Some(self.callback), + core::ptr::from_ref(self.inner()).cast_mut().cast(), + ) + } == 0) } /// Obtain `&'a T`, bypassing the [`Revocable`]. @@ -213,44 +233,63 @@ impl<T: Send> Devres<T> { /// } /// ``` pub fn access<'a>(&'a self, dev: &'a Device<Bound>) -> Result<&'a T> { - if self.0.dev.as_raw() != dev.as_raw() { + if self.dev.as_raw() != dev.as_raw() { return Err(EINVAL); } // SAFETY: `dev` being the same device as the device this `Devres` has been created for - // proves that `self.0.data` hasn't been revoked and is guaranteed to not be revoked as - // long as `dev` lives; `dev` lives at least as long as `self`. - Ok(unsafe { self.0.data.access() }) + // proves that `self.data` hasn't been revoked and is guaranteed to not be revoked as long + // as `dev` lives; `dev` lives at least as long as `self`. + Ok(unsafe { self.data().access() }) } /// [`Devres`] accessor for [`Revocable::try_access`]. pub fn try_access(&self) -> Option<RevocableGuard<'_, T>> { - self.0.data.try_access() + self.data().try_access() } /// [`Devres`] accessor for [`Revocable::try_access_with`]. pub fn try_access_with<R, F: FnOnce(&T) -> R>(&self, f: F) -> Option<R> { - self.0.data.try_access_with(f) + self.data().try_access_with(f) } /// [`Devres`] accessor for [`Revocable::try_access_with_guard`]. pub fn try_access_with_guard<'a>(&'a self, guard: &'a rcu::Guard) -> Option<&'a T> { - self.0.data.try_access_with_guard(guard) + self.data().try_access_with_guard(guard) } } -impl<T: Send> Drop for Devres<T> { - fn drop(&mut self) { +// SAFETY: `Devres` can be send to any task, if `T: Send`. +unsafe impl<T: Send> Send for Devres<T> {} + +// SAFETY: `Devres` can be shared with any task, if `T: Sync`. +unsafe impl<T: Send + Sync> Sync for Devres<T> {} + +#[pinned_drop] +impl<T: Send> PinnedDrop for Devres<T> { + fn drop(self: Pin<&mut Self>) { // SAFETY: When `drop` runs, it is guaranteed that nobody is accessing the revocable data // anymore, hence it is safe not to wait for the grace period to finish. - if unsafe { self.0.data.revoke_nosync() } { - // We revoked `self.0.data` before the devres action did, hence try to remove it. - if !DevresInner::remove_action(&self.0) { + if unsafe { self.data().revoke_nosync() } { + // We revoked `self.data` before the devres action did, hence try to remove it. + if !self.remove_action() { // We could not remove the devres action, which means that it now runs concurrently, - // hence signal that `self.0.data` has been revoked successfully. - self.0.revoke.complete_all(); + // hence signal that `self.data` has been revoked by us successfully. + self.inner().revoke.complete_all(); + + // Wait for `Self::devres_callback` to be done using this object. + self.inner().devm.wait_for_completion(); } + } else { + // `Self::devres_callback` revokes `self.data` for us, hence wait for it to be done + // using this object. + self.inner().devm.wait_for_completion(); } + + // INVARIANT: At this point it is guaranteed that `inner` can't be accessed any more. + // + // SAFETY: `inner` is valid for dropping. + unsafe { core::ptr::drop_in_place(self.inner.get()) }; } } diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 8435f8132e38..db0eb7eaf9b1 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -5,7 +5,6 @@ //! C header: [`include/linux/pci.h`](srctree/include/linux/pci.h) use crate::{ - alloc::flags::*, bindings, container_of, device, device_id::RawDeviceId, devres::Devres, @@ -398,19 +397,20 @@ impl Device { impl Device<device::Bound> { /// Mapps an entire PCI-BAR after performing a region-request on it. I/O operation bound checks /// can be performed on compile time for offsets (plus the requested type size) < SIZE. - pub fn iomap_region_sized<const SIZE: usize>( - &self, + pub fn iomap_region_sized<'a, const SIZE: usize>( + &'a self, bar: u32, - name: &CStr, - ) -> Result<Devres<Bar<SIZE>>> { - let bar = Bar::<SIZE>::new(self, bar, name)?; - let devres = Devres::new(self.as_ref(), bar, GFP_KERNEL)?; - - Ok(devres) + name: &'a CStr, + ) -> impl PinInit<Devres<Bar<SIZE>>, Error> + 'a { + Devres::new(self.as_ref(), Bar::<SIZE>::new(self, bar, name)) } /// Mapps an entire PCI-BAR after performing a region-request on it. - pub fn iomap_region(&self, bar: u32, name: &CStr) -> Result<Devres<Bar>> { + pub fn iomap_region<'a>( + &'a self, + bar: u32, + name: &'a CStr, + ) -> impl PinInit<Devres<Bar>, Error> + 'a { self.iomap_region_sized::<0>(bar, name) } } |