summaryrefslogtreecommitdiff
path: root/rust/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'rust/kernel')
-rw-r--r--rust/kernel/.gitignore3
-rw-r--r--rust/kernel/alloc.rs224
-rw-r--r--rust/kernel/alloc/allocator.rs188
-rw-r--r--rust/kernel/alloc/allocator_test.rs113
-rw-r--r--rust/kernel/alloc/kbox.rs487
-rw-r--r--rust/kernel/alloc/kvec.rs913
-rw-r--r--rust/kernel/alloc/layout.rs110
-rw-r--r--rust/kernel/allocator.rs88
-rw-r--r--rust/kernel/block.rs5
-rw-r--r--rust/kernel/block/mq.rs98
-rw-r--r--rust/kernel/block/mq/gen_disk.rs196
-rw-r--r--rust/kernel/block/mq/operations.rs246
-rw-r--r--rust/kernel/block/mq/raw_writer.rs55
-rw-r--r--rust/kernel/block/mq/request.rs262
-rw-r--r--rust/kernel/block/mq/tag_set.rs86
-rw-r--r--rust/kernel/build_assert.rs12
-rw-r--r--rust/kernel/cred.rs85
-rw-r--r--rust/kernel/device.rs420
-rw-r--r--rust/kernel/device_id.rs165
-rw-r--r--rust/kernel/devres.rs201
-rw-r--r--rust/kernel/driver.rs188
-rw-r--r--rust/kernel/error.rs113
-rw-r--r--rust/kernel/faux.rs67
-rw-r--r--rust/kernel/firmware.rs117
-rw-r--r--rust/kernel/fs.rs8
-rw-r--r--rust/kernel/fs/file.rs461
-rw-r--r--rust/kernel/generated_arch_static_branch_asm.rs.S7
-rw-r--r--rust/kernel/init.rs356
-rw-r--r--rust/kernel/init/__internal.rs42
-rw-r--r--rust/kernel/init/macros.rs75
-rw-r--r--rust/kernel/io.rs260
-rw-r--r--rust/kernel/ioctl.rs8
-rw-r--r--rust/kernel/jump_label.rs74
-rw-r--r--rust/kernel/kunit.rs4
-rw-r--r--rust/kernel/lib.rs172
-rw-r--r--rust/kernel/list.rs687
-rw-r--r--rust/kernel/list/arc.rs521
-rw-r--r--rust/kernel/list/arc_field.rs96
-rw-r--r--rust/kernel/list/impl_list_item_mod.rs274
-rw-r--r--rust/kernel/miscdevice.rs330
-rw-r--r--rust/kernel/net/phy.rs156
-rw-r--r--rust/kernel/net/phy/reg.rs224
-rw-r--r--rust/kernel/of.rs60
-rw-r--r--rust/kernel/page.rs258
-rw-r--r--rust/kernel/pci.rs434
-rw-r--r--rust/kernel/pid_namespace.rs68
-rw-r--r--rust/kernel/platform.rs200
-rw-r--r--rust/kernel/prelude.rs9
-rw-r--r--rust/kernel/print.rs34
-rw-r--r--rust/kernel/rbtree.rs1286
-rw-r--r--rust/kernel/revocable.rs219
-rw-r--r--rust/kernel/security.rs70
-rw-r--r--rust/kernel/seq_file.rs52
-rw-r--r--rust/kernel/sizes.rs26
-rw-r--r--rust/kernel/std_vendor.rs47
-rw-r--r--rust/kernel/str.rs357
-rw-r--r--rust/kernel/sync.rs18
-rw-r--r--rust/kernel/sync/arc.rs298
-rw-r--r--rust/kernel/sync/arc/std_vendor.rs2
-rw-r--r--rust/kernel/sync/condvar.rs116
-rw-r--r--rust/kernel/sync/lock.rs94
-rw-r--r--rust/kernel/sync/lock/global.rs301
-rw-r--r--rust/kernel/sync/lock/mutex.rs33
-rw-r--r--rust/kernel/sync/lock/spinlock.rs35
-rw-r--r--rust/kernel/sync/locked_by.rs29
-rw-r--r--rust/kernel/sync/poll.rs121
-rw-r--r--rust/kernel/sync/rcu.rs47
-rw-r--r--rust/kernel/task.rs261
-rw-r--r--rust/kernel/time.rs83
-rw-r--r--rust/kernel/tracepoint.rs49
-rw-r--r--rust/kernel/transmute.rs71
-rw-r--r--rust/kernel/types.rs274
-rw-r--r--rust/kernel/uaccess.rs372
-rw-r--r--rust/kernel/workqueue.rs202
74 files changed, 12933 insertions, 790 deletions
diff --git a/rust/kernel/.gitignore b/rust/kernel/.gitignore
new file mode 100644
index 000000000000..6ba39a178f30
--- /dev/null
+++ b/rust/kernel/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+/generated_arch_static_branch_asm.rs
diff --git a/rust/kernel/alloc.rs b/rust/kernel/alloc.rs
new file mode 100644
index 000000000000..fc9c9c41cd79
--- /dev/null
+++ b/rust/kernel/alloc.rs
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Implementation of the kernel's memory allocation infrastructure.
+
+#[cfg(not(any(test, testlib)))]
+pub mod allocator;
+pub mod kbox;
+pub mod kvec;
+pub mod layout;
+
+#[cfg(any(test, testlib))]
+pub mod allocator_test;
+
+#[cfg(any(test, testlib))]
+pub use self::allocator_test as allocator;
+
+pub use self::kbox::Box;
+pub use self::kbox::KBox;
+pub use self::kbox::KVBox;
+pub use self::kbox::VBox;
+
+pub use self::kvec::IntoIter;
+pub use self::kvec::KVVec;
+pub use self::kvec::KVec;
+pub use self::kvec::VVec;
+pub use self::kvec::Vec;
+
+/// Indicates an allocation error.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub struct AllocError;
+use core::{alloc::Layout, ptr::NonNull};
+
+/// Flags to be used when allocating memory.
+///
+/// They can be combined with the operators `|`, `&`, and `!`.
+///
+/// Values can be used from the [`flags`] module.
+#[derive(Clone, Copy, PartialEq)]
+pub struct Flags(u32);
+
+impl Flags {
+ /// Get the raw representation of this flag.
+ pub(crate) fn as_raw(self) -> u32 {
+ self.0
+ }
+
+ /// Check whether `flags` is contained in `self`.
+ pub fn contains(self, flags: Flags) -> bool {
+ (self & flags) == flags
+ }
+}
+
+impl core::ops::BitOr for Flags {
+ type Output = Self;
+ fn bitor(self, rhs: Self) -> Self::Output {
+ Self(self.0 | rhs.0)
+ }
+}
+
+impl core::ops::BitAnd for Flags {
+ type Output = Self;
+ fn bitand(self, rhs: Self) -> Self::Output {
+ Self(self.0 & rhs.0)
+ }
+}
+
+impl core::ops::Not for Flags {
+ type Output = Self;
+ fn not(self) -> Self::Output {
+ Self(!self.0)
+ }
+}
+
+/// Allocation flags.
+///
+/// These are meant to be used in functions that can allocate memory.
+pub mod flags {
+ use super::Flags;
+
+ /// Zeroes out the allocated memory.
+ ///
+ /// This is normally or'd with other flags.
+ pub const __GFP_ZERO: Flags = Flags(bindings::__GFP_ZERO);
+
+ /// Allow the allocation to be in high memory.
+ ///
+ /// Allocations in high memory may not be mapped into the kernel's address space, so this can't
+ /// be used with `kmalloc` and other similar methods.
+ ///
+ /// This is normally or'd with other flags.
+ pub const __GFP_HIGHMEM: Flags = Flags(bindings::__GFP_HIGHMEM);
+
+ /// Users can not sleep and need the allocation to succeed.
+ ///
+ /// A lower watermark is applied to allow access to "atomic reserves". The current
+ /// implementation doesn't support NMI and few other strict non-preemptive contexts (e.g.
+ /// raw_spin_lock). The same applies to [`GFP_NOWAIT`].
+ pub const GFP_ATOMIC: Flags = Flags(bindings::GFP_ATOMIC);
+
+ /// Typical for kernel-internal allocations. The caller requires ZONE_NORMAL or a lower zone
+ /// for direct access but can direct reclaim.
+ pub const GFP_KERNEL: Flags = Flags(bindings::GFP_KERNEL);
+
+ /// The same as [`GFP_KERNEL`], except the allocation is accounted to kmemcg.
+ pub const GFP_KERNEL_ACCOUNT: Flags = Flags(bindings::GFP_KERNEL_ACCOUNT);
+
+ /// For kernel allocations that should not stall for direct reclaim, start physical IO or
+ /// use any filesystem callback. It is very likely to fail to allocate memory, even for very
+ /// small allocations.
+ pub const GFP_NOWAIT: Flags = Flags(bindings::GFP_NOWAIT);
+
+ /// Suppresses allocation failure reports.
+ ///
+ /// This is normally or'd with other flags.
+ pub const __GFP_NOWARN: Flags = Flags(bindings::__GFP_NOWARN);
+}
+
+/// The kernel's [`Allocator`] trait.
+///
+/// An implementation of [`Allocator`] can allocate, re-allocate and free memory buffers described
+/// via [`Layout`].
+///
+/// [`Allocator`] is designed to be implemented as a ZST; [`Allocator`] functions do not operate on
+/// an object instance.
+///
+/// In order to be able to support `#[derive(CoercePointee)]` later on, we need to avoid a design
+/// that requires an `Allocator` to be instantiated, hence its functions must not contain any kind
+/// of `self` parameter.
+///
+/// # Safety
+///
+/// - A memory allocation returned from an allocator must remain valid until it is explicitly freed.
+///
+/// - Any pointer to a valid memory allocation must be valid to be passed to any other [`Allocator`]
+/// function of the same type.
+///
+/// - Implementers must ensure that all trait functions abide by the guarantees documented in the
+/// `# Guarantees` sections.
+pub unsafe trait Allocator {
+ /// Allocate memory based on `layout` and `flags`.
+ ///
+ /// On success, returns a buffer represented as `NonNull<[u8]>` that satisfies the layout
+ /// constraints (i.e. minimum size and alignment as specified by `layout`).
+ ///
+ /// This function is equivalent to `realloc` when called with `None`.
+ ///
+ /// # Guarantees
+ ///
+ /// When the return value is `Ok(ptr)`, then `ptr` is
+ /// - valid for reads and writes for `layout.size()` bytes, until it is passed to
+ /// [`Allocator::free`] or [`Allocator::realloc`],
+ /// - aligned to `layout.align()`,
+ ///
+ /// Additionally, `Flags` are honored as documented in
+ /// <https://docs.kernel.org/core-api/mm-api.html#mm-api-gfp-flags>.
+ fn alloc(layout: Layout, flags: Flags) -> Result<NonNull<[u8]>, AllocError> {
+ // SAFETY: Passing `None` to `realloc` is valid by its safety requirements and asks for a
+ // new memory allocation.
+ unsafe { Self::realloc(None, layout, Layout::new::<()>(), flags) }
+ }
+
+ /// Re-allocate an existing memory allocation to satisfy the requested `layout`.
+ ///
+ /// If the requested size is zero, `realloc` behaves equivalent to `free`.
+ ///
+ /// If the requested size is larger than the size of the existing allocation, a successful call
+ /// to `realloc` guarantees that the new or grown buffer has at least `Layout::size` bytes, but
+ /// may also be larger.
+ ///
+ /// If the requested size is smaller than the size of the existing allocation, `realloc` may or
+ /// may not shrink the buffer; this is implementation specific to the allocator.
+ ///
+ /// On allocation failure, the existing buffer, if any, remains valid.
+ ///
+ /// The buffer is represented as `NonNull<[u8]>`.
+ ///
+ /// # Safety
+ ///
+ /// - If `ptr == Some(p)`, then `p` must point to an existing and valid memory allocation
+ /// created by this [`Allocator`]; if `old_layout` is zero-sized `p` does not need to be a
+ /// pointer returned by this [`Allocator`].
+ /// - `ptr` is allowed to be `None`; in this case a new memory allocation is created and
+ /// `old_layout` is ignored.
+ /// - `old_layout` must match the `Layout` the allocation has been created with.
+ ///
+ /// # Guarantees
+ ///
+ /// This function has the same guarantees as [`Allocator::alloc`]. When `ptr == Some(p)`, then
+ /// it additionally guarantees that:
+ /// - the contents of the memory pointed to by `p` are preserved up to the lesser of the new
+ /// and old size, i.e. `ret_ptr[0..min(layout.size(), old_layout.size())] ==
+ /// p[0..min(layout.size(), old_layout.size())]`.
+ /// - when the return value is `Err(AllocError)`, then `ptr` is still valid.
+ unsafe fn realloc(
+ ptr: Option<NonNull<u8>>,
+ layout: Layout,
+ old_layout: Layout,
+ flags: Flags,
+ ) -> Result<NonNull<[u8]>, AllocError>;
+
+ /// Free an existing memory allocation.
+ ///
+ /// # Safety
+ ///
+ /// - `ptr` must point to an existing and valid memory allocation created by this [`Allocator`];
+ /// if `old_layout` is zero-sized `p` does not need to be a pointer returned by this
+ /// [`Allocator`].
+ /// - `layout` must match the `Layout` the allocation has been created with.
+ /// - The memory allocation at `ptr` must never again be read from or written to.
+ unsafe fn free(ptr: NonNull<u8>, layout: Layout) {
+ // SAFETY: The caller guarantees that `ptr` points at a valid allocation created by this
+ // allocator. We are passing a `Layout` with the smallest possible alignment, so it is
+ // smaller than or equal to the alignment previously used with this allocation.
+ let _ = unsafe { Self::realloc(Some(ptr), Layout::new::<()>(), layout, Flags(0)) };
+ }
+}
+
+/// Returns a properly aligned dangling pointer from the given `layout`.
+pub(crate) fn dangling_from_layout(layout: Layout) -> NonNull<u8> {
+ let ptr = layout.align() as *mut u8;
+
+ // SAFETY: `layout.align()` (and hence `ptr`) is guaranteed to be non-zero.
+ unsafe { NonNull::new_unchecked(ptr) }
+}
diff --git a/rust/kernel/alloc/allocator.rs b/rust/kernel/alloc/allocator.rs
new file mode 100644
index 000000000000..439985e29fbc
--- /dev/null
+++ b/rust/kernel/alloc/allocator.rs
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Allocator support.
+//!
+//! Documentation for the kernel's memory allocators can found in the "Memory Allocation Guide"
+//! linked below. For instance, this includes the concept of "get free page" (GFP) flags and the
+//! typical application of the different kernel allocators.
+//!
+//! Reference: <https://docs.kernel.org/core-api/memory-allocation.html>
+
+use super::Flags;
+use core::alloc::Layout;
+use core::ptr;
+use core::ptr::NonNull;
+
+use crate::alloc::{AllocError, Allocator};
+use crate::bindings;
+use crate::pr_warn;
+
+/// The contiguous kernel allocator.
+///
+/// `Kmalloc` is typically used for physically contiguous allocations up to page size, but also
+/// supports larger allocations up to `bindings::KMALLOC_MAX_SIZE`, which is hardware specific.
+///
+/// For more details see [self].
+pub struct Kmalloc;
+
+/// The virtually contiguous kernel allocator.
+///
+/// `Vmalloc` allocates pages from the page level allocator and maps them into the contiguous kernel
+/// virtual space. It is typically used for large allocations. The memory allocated with this
+/// allocator is not physically contiguous.
+///
+/// For more details see [self].
+pub struct Vmalloc;
+
+/// The kvmalloc kernel allocator.
+///
+/// `KVmalloc` attempts to allocate memory with `Kmalloc` first, but falls back to `Vmalloc` upon
+/// failure. This allocator is typically used when the size for the requested allocation is not
+/// known and may exceed the capabilities of `Kmalloc`.
+///
+/// For more details see [self].
+pub struct KVmalloc;
+
+/// Returns a proper size to alloc a new object aligned to `new_layout`'s alignment.
+fn aligned_size(new_layout: Layout) -> usize {
+ // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first.
+ let layout = new_layout.pad_to_align();
+
+ // Note that `layout.size()` (after padding) is guaranteed to be a multiple of `layout.align()`
+ // which together with the slab guarantees means the `krealloc` will return a properly aligned
+ // object (see comments in `kmalloc()` for more information).
+ layout.size()
+}
+
+/// # Invariants
+///
+/// One of the following: `krealloc`, `vrealloc`, `kvrealloc`.
+struct ReallocFunc(
+ unsafe extern "C" fn(*const crate::ffi::c_void, usize, u32) -> *mut crate::ffi::c_void,
+);
+
+impl ReallocFunc {
+ // INVARIANT: `krealloc` satisfies the type invariants.
+ const KREALLOC: Self = Self(bindings::krealloc);
+
+ // INVARIANT: `vrealloc` satisfies the type invariants.
+ const VREALLOC: Self = Self(bindings::vrealloc);
+
+ // INVARIANT: `kvrealloc` satisfies the type invariants.
+ const KVREALLOC: Self = Self(bindings::kvrealloc);
+
+ /// # Safety
+ ///
+ /// This method has the same safety requirements as [`Allocator::realloc`].
+ ///
+ /// # Guarantees
+ ///
+ /// This method has the same guarantees as `Allocator::realloc`. Additionally
+ /// - it accepts any pointer to a valid memory allocation allocated by this function.
+ /// - memory allocated by this function remains valid until it is passed to this function.
+ unsafe fn call(
+ &self,
+ ptr: Option<NonNull<u8>>,
+ layout: Layout,
+ old_layout: Layout,
+ flags: Flags,
+ ) -> Result<NonNull<[u8]>, AllocError> {
+ let size = aligned_size(layout);
+ let ptr = match ptr {
+ Some(ptr) => {
+ if old_layout.size() == 0 {
+ ptr::null()
+ } else {
+ ptr.as_ptr()
+ }
+ }
+ None => ptr::null(),
+ };
+
+ // SAFETY:
+ // - `self.0` is one of `krealloc`, `vrealloc`, `kvrealloc` and thus only requires that
+ // `ptr` is NULL or valid.
+ // - `ptr` is either NULL or valid by the safety requirements of this function.
+ //
+ // GUARANTEE:
+ // - `self.0` is one of `krealloc`, `vrealloc`, `kvrealloc`.
+ // - Those functions provide the guarantees of this function.
+ let raw_ptr = unsafe {
+ // If `size == 0` and `ptr != NULL` the memory behind the pointer is freed.
+ self.0(ptr.cast(), size, flags.0).cast()
+ };
+
+ let ptr = if size == 0 {
+ crate::alloc::dangling_from_layout(layout)
+ } else {
+ NonNull::new(raw_ptr).ok_or(AllocError)?
+ };
+
+ Ok(NonNull::slice_from_raw_parts(ptr, size))
+ }
+}
+
+// SAFETY: `realloc` delegates to `ReallocFunc::call`, which guarantees that
+// - memory remains valid until it is explicitly freed,
+// - passing a pointer to a valid memory allocation is OK,
+// - `realloc` satisfies the guarantees, since `ReallocFunc::call` has the same.
+unsafe impl Allocator for Kmalloc {
+ #[inline]
+ unsafe fn realloc(
+ ptr: Option<NonNull<u8>>,
+ layout: Layout,
+ old_layout: Layout,
+ flags: Flags,
+ ) -> Result<NonNull<[u8]>, AllocError> {
+ // SAFETY: `ReallocFunc::call` has the same safety requirements as `Allocator::realloc`.
+ unsafe { ReallocFunc::KREALLOC.call(ptr, layout, old_layout, flags) }
+ }
+}
+
+// SAFETY: `realloc` delegates to `ReallocFunc::call`, which guarantees that
+// - memory remains valid until it is explicitly freed,
+// - passing a pointer to a valid memory allocation is OK,
+// - `realloc` satisfies the guarantees, since `ReallocFunc::call` has the same.
+unsafe impl Allocator for Vmalloc {
+ #[inline]
+ unsafe fn realloc(
+ ptr: Option<NonNull<u8>>,
+ layout: Layout,
+ old_layout: Layout,
+ flags: Flags,
+ ) -> Result<NonNull<[u8]>, AllocError> {
+ // TODO: Support alignments larger than PAGE_SIZE.
+ if layout.align() > bindings::PAGE_SIZE {
+ pr_warn!("Vmalloc does not support alignments larger than PAGE_SIZE yet.\n");
+ return Err(AllocError);
+ }
+
+ // SAFETY: If not `None`, `ptr` is guaranteed to point to valid memory, which was previously
+ // allocated with this `Allocator`.
+ unsafe { ReallocFunc::VREALLOC.call(ptr, layout, old_layout, flags) }
+ }
+}
+
+// SAFETY: `realloc` delegates to `ReallocFunc::call`, which guarantees that
+// - memory remains valid until it is explicitly freed,
+// - passing a pointer to a valid memory allocation is OK,
+// - `realloc` satisfies the guarantees, since `ReallocFunc::call` has the same.
+unsafe impl Allocator for KVmalloc {
+ #[inline]
+ unsafe fn realloc(
+ ptr: Option<NonNull<u8>>,
+ layout: Layout,
+ old_layout: Layout,
+ flags: Flags,
+ ) -> Result<NonNull<[u8]>, AllocError> {
+ // TODO: Support alignments larger than PAGE_SIZE.
+ if layout.align() > bindings::PAGE_SIZE {
+ pr_warn!("KVmalloc does not support alignments larger than PAGE_SIZE yet.\n");
+ return Err(AllocError);
+ }
+
+ // SAFETY: If not `None`, `ptr` is guaranteed to point to valid memory, which was previously
+ // allocated with this `Allocator`.
+ unsafe { ReallocFunc::KVREALLOC.call(ptr, layout, old_layout, flags) }
+ }
+}
diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs
new file mode 100644
index 000000000000..c37d4c0c64e9
--- /dev/null
+++ b/rust/kernel/alloc/allocator_test.rs
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! So far the kernel's `Box` and `Vec` types can't be used by userspace test cases, since all users
+//! of those types (e.g. `CString`) use kernel allocators for instantiation.
+//!
+//! In order to allow userspace test cases to make use of such types as well, implement the
+//! `Cmalloc` allocator within the allocator_test module and type alias all kernel allocators to
+//! `Cmalloc`. The `Cmalloc` allocator uses libc's `realloc()` function as allocator backend.
+
+#![allow(missing_docs)]
+
+use super::{flags::*, AllocError, Allocator, Flags};
+use core::alloc::Layout;
+use core::cmp;
+use core::ptr;
+use core::ptr::NonNull;
+
+/// The userspace allocator based on libc.
+pub struct Cmalloc;
+
+pub type Kmalloc = Cmalloc;
+pub type Vmalloc = Kmalloc;
+pub type KVmalloc = Kmalloc;
+
+extern "C" {
+ #[link_name = "aligned_alloc"]
+ fn libc_aligned_alloc(align: usize, size: usize) -> *mut crate::ffi::c_void;
+
+ #[link_name = "free"]
+ fn libc_free(ptr: *mut crate::ffi::c_void);
+}
+
+// SAFETY:
+// - memory remains valid until it is explicitly freed,
+// - passing a pointer to a valid memory allocation created by this `Allocator` is always OK,
+// - `realloc` provides the guarantees as provided in the `# Guarantees` section.
+unsafe impl Allocator for Cmalloc {
+ unsafe fn realloc(
+ ptr: Option<NonNull<u8>>,
+ layout: Layout,
+ old_layout: Layout,
+ flags: Flags,
+ ) -> Result<NonNull<[u8]>, AllocError> {
+ let src = match ptr {
+ Some(src) => {
+ if old_layout.size() == 0 {
+ ptr::null_mut()
+ } else {
+ src.as_ptr()
+ }
+ }
+ None => ptr::null_mut(),
+ };
+
+ if layout.size() == 0 {
+ // SAFETY: `src` is either NULL or was previously allocated with this `Allocator`
+ unsafe { libc_free(src.cast()) };
+
+ return Ok(NonNull::slice_from_raw_parts(
+ crate::alloc::dangling_from_layout(layout),
+ 0,
+ ));
+ }
+
+ // ISO C (ISO/IEC 9899:2011) defines `aligned_alloc`:
+ //
+ // > The value of alignment shall be a valid alignment supported by the implementation
+ // [...].
+ //
+ // As an example of the "supported by the implementation" requirement, POSIX.1-2001 (IEEE
+ // 1003.1-2001) defines `posix_memalign`:
+ //
+ // > The value of alignment shall be a power of two multiple of sizeof (void *).
+ //
+ // and POSIX-based implementations of `aligned_alloc` inherit this requirement. At the time
+ // of writing, this is known to be the case on macOS (but not in glibc).
+ //
+ // Satisfy the stricter requirement to avoid spurious test failures on some platforms.
+ let min_align = core::mem::size_of::<*const crate::ffi::c_void>();
+ let layout = layout.align_to(min_align).map_err(|_| AllocError)?;
+ let layout = layout.pad_to_align();
+
+ // SAFETY: Returns either NULL or a pointer to a memory allocation that satisfies or
+ // exceeds the given size and alignment requirements.
+ let dst = unsafe { libc_aligned_alloc(layout.align(), layout.size()) } as *mut u8;
+ let dst = NonNull::new(dst).ok_or(AllocError)?;
+
+ if flags.contains(__GFP_ZERO) {
+ // SAFETY: The preceding calls to `libc_aligned_alloc` and `NonNull::new`
+ // guarantee that `dst` points to memory of at least `layout.size()` bytes.
+ unsafe { dst.as_ptr().write_bytes(0, layout.size()) };
+ }
+
+ if !src.is_null() {
+ // SAFETY:
+ // - `src` has previously been allocated with this `Allocator`; `dst` has just been
+ // newly allocated, hence the memory regions do not overlap.
+ // - both` src` and `dst` are properly aligned and valid for reads and writes
+ unsafe {
+ ptr::copy_nonoverlapping(
+ src,
+ dst.as_ptr(),
+ cmp::min(layout.size(), old_layout.size()),
+ )
+ };
+ }
+
+ // SAFETY: `src` is either NULL or was previously allocated with this `Allocator`
+ unsafe { libc_free(src.cast()) };
+
+ Ok(NonNull::slice_from_raw_parts(dst, layout.size()))
+ }
+}
diff --git a/rust/kernel/alloc/kbox.rs b/rust/kernel/alloc/kbox.rs
new file mode 100644
index 000000000000..cb4ebea3b074
--- /dev/null
+++ b/rust/kernel/alloc/kbox.rs
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Implementation of [`Box`].
+
+#[allow(unused_imports)] // Used in doc comments.
+use super::allocator::{KVmalloc, Kmalloc, Vmalloc};
+use super::{AllocError, Allocator, Flags};
+use core::alloc::Layout;
+use core::fmt;
+use core::marker::PhantomData;
+use core::mem::ManuallyDrop;
+use core::mem::MaybeUninit;
+use core::ops::{Deref, DerefMut};
+use core::pin::Pin;
+use core::ptr::NonNull;
+use core::result::Result;
+
+use crate::init::{InPlaceInit, InPlaceWrite, Init, PinInit};
+use crate::types::ForeignOwnable;
+
+/// The kernel's [`Box`] type -- a heap allocation for a single value of type `T`.
+///
+/// This is the kernel's version of the Rust stdlib's `Box`. There are several differences,
+/// for example no `noalias` attribute is emitted and partially moving out of a `Box` is not
+/// supported. There are also several API differences, e.g. `Box` always requires an [`Allocator`]
+/// implementation to be passed as generic, page [`Flags`] when allocating memory and all functions
+/// that may allocate memory are fallible.
+///
+/// `Box` works with any of the kernel's allocators, e.g. [`Kmalloc`], [`Vmalloc`] or [`KVmalloc`].
+/// There are aliases for `Box` with these allocators ([`KBox`], [`VBox`], [`KVBox`]).
+///
+/// When dropping a [`Box`], the value is also dropped and the heap memory is automatically freed.
+///
+/// # Examples
+///
+/// ```
+/// let b = KBox::<u64>::new(24_u64, GFP_KERNEL)?;
+///
+/// assert_eq!(*b, 24_u64);
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// ```
+/// # use kernel::bindings;
+/// const SIZE: usize = bindings::KMALLOC_MAX_SIZE as usize + 1;
+/// struct Huge([u8; SIZE]);
+///
+/// assert!(KBox::<Huge>::new_uninit(GFP_KERNEL | __GFP_NOWARN).is_err());
+/// ```
+///
+/// ```
+/// # use kernel::bindings;
+/// const SIZE: usize = bindings::KMALLOC_MAX_SIZE as usize + 1;
+/// struct Huge([u8; SIZE]);
+///
+/// assert!(KVBox::<Huge>::new_uninit(GFP_KERNEL).is_ok());
+/// ```
+///
+/// # Invariants
+///
+/// `self.0` is always properly aligned and either points to memory allocated with `A` or, for
+/// zero-sized types, is a dangling, well aligned pointer.
+#[repr(transparent)]
+pub struct Box<T: ?Sized, A: Allocator>(NonNull<T>, PhantomData<A>);
+
+/// Type alias for [`Box`] with a [`Kmalloc`] allocator.
+///
+/// # Examples
+///
+/// ```
+/// let b = KBox::new(24_u64, GFP_KERNEL)?;
+///
+/// assert_eq!(*b, 24_u64);
+/// # Ok::<(), Error>(())
+/// ```
+pub type KBox<T> = Box<T, super::allocator::Kmalloc>;
+
+/// Type alias for [`Box`] with a [`Vmalloc`] allocator.
+///
+/// # Examples
+///
+/// ```
+/// let b = VBox::new(24_u64, GFP_KERNEL)?;
+///
+/// assert_eq!(*b, 24_u64);
+/// # Ok::<(), Error>(())
+/// ```
+pub type VBox<T> = Box<T, super::allocator::Vmalloc>;
+
+/// Type alias for [`Box`] with a [`KVmalloc`] allocator.
+///
+/// # Examples
+///
+/// ```
+/// let b = KVBox::new(24_u64, GFP_KERNEL)?;
+///
+/// assert_eq!(*b, 24_u64);
+/// # Ok::<(), Error>(())
+/// ```
+pub type KVBox<T> = Box<T, super::allocator::KVmalloc>;
+
+// SAFETY: `Box` is `Send` if `T` is `Send` because the `Box` owns a `T`.
+unsafe impl<T, A> Send for Box<T, A>
+where
+ T: Send + ?Sized,
+ A: Allocator,
+{
+}
+
+// SAFETY: `Box` is `Sync` if `T` is `Sync` because the `Box` owns a `T`.
+unsafe impl<T, A> Sync for Box<T, A>
+where
+ T: Sync + ?Sized,
+ A: Allocator,
+{
+}
+
+impl<T, A> Box<T, A>
+where
+ T: ?Sized,
+ A: Allocator,
+{
+ /// Creates a new `Box<T, A>` from a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// For non-ZSTs, `raw` must point at an allocation allocated with `A` that is sufficiently
+ /// aligned for and holds a valid `T`. The caller passes ownership of the allocation to the
+ /// `Box`.
+ ///
+ /// For ZSTs, `raw` must be a dangling, well aligned pointer.
+ #[inline]
+ pub const unsafe fn from_raw(raw: *mut T) -> Self {
+ // INVARIANT: Validity of `raw` is guaranteed by the safety preconditions of this function.
+ // SAFETY: By the safety preconditions of this function, `raw` is not a NULL pointer.
+ Self(unsafe { NonNull::new_unchecked(raw) }, PhantomData)
+ }
+
+ /// Consumes the `Box<T, A>` and returns a raw pointer.
+ ///
+ /// This will not run the destructor of `T` and for non-ZSTs the allocation will stay alive
+ /// indefinitely. Use [`Box::from_raw`] to recover the [`Box`], drop the value and free the
+ /// allocation, if any.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let x = KBox::new(24, GFP_KERNEL)?;
+ /// let ptr = KBox::into_raw(x);
+ /// // SAFETY: `ptr` comes from a previous call to `KBox::into_raw`.
+ /// let x = unsafe { KBox::from_raw(ptr) };
+ ///
+ /// assert_eq!(*x, 24);
+ /// # Ok::<(), Error>(())
+ /// ```
+ #[inline]
+ pub fn into_raw(b: Self) -> *mut T {
+ ManuallyDrop::new(b).0.as_ptr()
+ }
+
+ /// Consumes and leaks the `Box<T, A>` and returns a mutable reference.
+ ///
+ /// See [`Box::into_raw`] for more details.
+ #[inline]
+ pub fn leak<'a>(b: Self) -> &'a mut T {
+ // SAFETY: `Box::into_raw` always returns a properly aligned and dereferenceable pointer
+ // which points to an initialized instance of `T`.
+ unsafe { &mut *Box::into_raw(b) }
+ }
+}
+
+impl<T, A> Box<MaybeUninit<T>, A>
+where
+ A: Allocator,
+{
+ /// Converts a `Box<MaybeUninit<T>, A>` to a `Box<T, A>`.
+ ///
+ /// It is undefined behavior to call this function while the value inside of `b` is not yet
+ /// fully initialized.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that the value inside of `b` is in an initialized state.
+ pub unsafe fn assume_init(self) -> Box<T, A> {
+ let raw = Self::into_raw(self);
+
+ // SAFETY: `raw` comes from a previous call to `Box::into_raw`. By the safety requirements
+ // of this function, the value inside the `Box` is in an initialized state. Hence, it is
+ // safe to reconstruct the `Box` as `Box<T, A>`.
+ unsafe { Box::from_raw(raw.cast()) }
+ }
+
+ /// Writes the value and converts to `Box<T, A>`.
+ pub fn write(mut self, value: T) -> Box<T, A> {
+ (*self).write(value);
+
+ // SAFETY: We've just initialized `b`'s value.
+ unsafe { self.assume_init() }
+ }
+}
+
+impl<T, A> Box<T, A>
+where
+ A: Allocator,
+{
+ /// Creates a new `Box<T, A>` and initializes its contents with `x`.
+ ///
+ /// New memory is allocated with `A`. The allocation may fail, in which case an error is
+ /// returned. For ZSTs no memory is allocated.
+ pub fn new(x: T, flags: Flags) -> Result<Self, AllocError> {
+ let b = Self::new_uninit(flags)?;
+ Ok(Box::write(b, x))
+ }
+
+ /// Creates a new `Box<T, A>` with uninitialized contents.
+ ///
+ /// New memory is allocated with `A`. The allocation may fail, in which case an error is
+ /// returned. For ZSTs no memory is allocated.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let b = KBox::<u64>::new_uninit(GFP_KERNEL)?;
+ /// let b = KBox::write(b, 24);
+ ///
+ /// assert_eq!(*b, 24_u64);
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn new_uninit(flags: Flags) -> Result<Box<MaybeUninit<T>, A>, AllocError> {
+ let layout = Layout::new::<MaybeUninit<T>>();
+ let ptr = A::alloc(layout, flags)?;
+
+ // INVARIANT: `ptr` is either a dangling pointer or points to memory allocated with `A`,
+ // which is sufficient in size and alignment for storing a `T`.
+ Ok(Box(ptr.cast(), PhantomData))
+ }
+
+ /// Constructs a new `Pin<Box<T, A>>`. If `T` does not implement [`Unpin`], then `x` will be
+ /// pinned in memory and can't be moved.
+ #[inline]
+ pub fn pin(x: T, flags: Flags) -> Result<Pin<Box<T, A>>, AllocError>
+ where
+ A: 'static,
+ {
+ Ok(Self::new(x, flags)?.into())
+ }
+
+ /// Forgets the contents (does not run the destructor), but keeps the allocation.
+ fn forget_contents(this: Self) -> Box<MaybeUninit<T>, A> {
+ let ptr = Self::into_raw(this);
+
+ // SAFETY: `ptr` is valid, because it came from `Box::into_raw`.
+ unsafe { Box::from_raw(ptr.cast()) }
+ }
+
+ /// Drops the contents, but keeps the allocation.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let value = KBox::new([0; 32], GFP_KERNEL)?;
+ /// assert_eq!(*value, [0; 32]);
+ /// let value = KBox::drop_contents(value);
+ /// // Now we can re-use `value`:
+ /// let value = KBox::write(value, [1; 32]);
+ /// assert_eq!(*value, [1; 32]);
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn drop_contents(this: Self) -> Box<MaybeUninit<T>, A> {
+ let ptr = this.0.as_ptr();
+
+ // SAFETY: `ptr` is valid, because it came from `this`. After this call we never access the
+ // value stored in `this` again.
+ unsafe { core::ptr::drop_in_place(ptr) };
+
+ Self::forget_contents(this)
+ }
+
+ /// Moves the `Box`'s value out of the `Box` and consumes the `Box`.
+ pub fn into_inner(b: Self) -> T {
+ // SAFETY: By the type invariant `&*b` is valid for `read`.
+ let value = unsafe { core::ptr::read(&*b) };
+ let _ = Self::forget_contents(b);
+ value
+ }
+}
+
+impl<T, A> From<Box<T, A>> for Pin<Box<T, A>>
+where
+ T: ?Sized,
+ A: Allocator,
+{
+ /// Converts a `Box<T, A>` into a `Pin<Box<T, A>>`. If `T` does not implement [`Unpin`], then
+ /// `*b` will be pinned in memory and can't be moved.
+ ///
+ /// This moves `b` into `Pin` without moving `*b` or allocating and copying any memory.
+ fn from(b: Box<T, A>) -> Self {
+ // SAFETY: The value wrapped inside a `Pin<Box<T, A>>` cannot be moved or replaced as long
+ // as `T` does not implement `Unpin`.
+ unsafe { Pin::new_unchecked(b) }
+ }
+}
+
+impl<T, A> InPlaceWrite<T> for Box<MaybeUninit<T>, A>
+where
+ A: Allocator + 'static,
+{
+ type Initialized = Box<T, A>;
+
+ fn write_init<E>(mut self, init: impl Init<T, E>) -> Result<Self::Initialized, E> {
+ let slot = self.as_mut_ptr();
+ // SAFETY: When init errors/panics, slot will get deallocated but not dropped,
+ // slot is valid.
+ unsafe { init.__init(slot)? };
+ // SAFETY: All fields have been initialized.
+ Ok(unsafe { Box::assume_init(self) })
+ }
+
+ fn write_pin_init<E>(mut self, init: impl PinInit<T, E>) -> Result<Pin<Self::Initialized>, E> {
+ let slot = self.as_mut_ptr();
+ // SAFETY: When init errors/panics, slot will get deallocated but not dropped,
+ // slot is valid and will not be moved, because we pin it later.
+ unsafe { init.__pinned_init(slot)? };
+ // SAFETY: All fields have been initialized.
+ Ok(unsafe { Box::assume_init(self) }.into())
+ }
+}
+
+impl<T, A> InPlaceInit<T> for Box<T, A>
+where
+ A: Allocator + 'static,
+{
+ type PinnedSelf = Pin<Self>;
+
+ #[inline]
+ fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Pin<Self>, E>
+ where
+ E: From<AllocError>,
+ {
+ Box::<_, A>::new_uninit(flags)?.write_pin_init(init)
+ }
+
+ #[inline]
+ fn try_init<E>(init: impl Init<T, E>, flags: Flags) -> Result<Self, E>
+ where
+ E: From<AllocError>,
+ {
+ Box::<_, A>::new_uninit(flags)?.write_init(init)
+ }
+}
+
+impl<T: 'static, A> ForeignOwnable for Box<T, A>
+where
+ A: Allocator,
+{
+ type Borrowed<'a> = &'a T;
+ type BorrowedMut<'a> = &'a mut T;
+
+ fn into_foreign(self) -> *mut crate::ffi::c_void {
+ Box::into_raw(self).cast()
+ }
+
+ unsafe fn from_foreign(ptr: *mut crate::ffi::c_void) -> Self {
+ // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous
+ // call to `Self::into_foreign`.
+ unsafe { Box::from_raw(ptr.cast()) }
+ }
+
+ unsafe fn borrow<'a>(ptr: *mut crate::ffi::c_void) -> &'a T {
+ // SAFETY: The safety requirements of this method ensure that the object remains alive and
+ // immutable for the duration of 'a.
+ unsafe { &*ptr.cast() }
+ }
+
+ unsafe fn borrow_mut<'a>(ptr: *mut crate::ffi::c_void) -> &'a mut T {
+ let ptr = ptr.cast();
+ // SAFETY: The safety requirements of this method ensure that the pointer is valid and that
+ // nothing else will access the value for the duration of 'a.
+ unsafe { &mut *ptr }
+ }
+}
+
+impl<T: 'static, A> ForeignOwnable for Pin<Box<T, A>>
+where
+ A: Allocator,
+{
+ type Borrowed<'a> = Pin<&'a T>;
+ type BorrowedMut<'a> = Pin<&'a mut T>;
+
+ fn into_foreign(self) -> *mut crate::ffi::c_void {
+ // SAFETY: We are still treating the box as pinned.
+ Box::into_raw(unsafe { Pin::into_inner_unchecked(self) }).cast()
+ }
+
+ unsafe fn from_foreign(ptr: *mut crate::ffi::c_void) -> Self {
+ // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous
+ // call to `Self::into_foreign`.
+ unsafe { Pin::new_unchecked(Box::from_raw(ptr.cast())) }
+ }
+
+ unsafe fn borrow<'a>(ptr: *mut crate::ffi::c_void) -> Pin<&'a T> {
+ // SAFETY: The safety requirements for this function ensure that the object is still alive,
+ // so it is safe to dereference the raw pointer.
+ // The safety requirements of `from_foreign` also ensure that the object remains alive for
+ // the lifetime of the returned value.
+ let r = unsafe { &*ptr.cast() };
+
+ // SAFETY: This pointer originates from a `Pin<Box<T>>`.
+ unsafe { Pin::new_unchecked(r) }
+ }
+
+ unsafe fn borrow_mut<'a>(ptr: *mut crate::ffi::c_void) -> Pin<&'a mut T> {
+ let ptr = ptr.cast();
+ // SAFETY: The safety requirements for this function ensure that the object is still alive,
+ // so it is safe to dereference the raw pointer.
+ // The safety requirements of `from_foreign` also ensure that the object remains alive for
+ // the lifetime of the returned value.
+ let r = unsafe { &mut *ptr };
+
+ // SAFETY: This pointer originates from a `Pin<Box<T>>`.
+ unsafe { Pin::new_unchecked(r) }
+ }
+}
+
+impl<T, A> Deref for Box<T, A>
+where
+ T: ?Sized,
+ A: Allocator,
+{
+ type Target = T;
+
+ fn deref(&self) -> &T {
+ // SAFETY: `self.0` is always properly aligned, dereferenceable and points to an initialized
+ // instance of `T`.
+ unsafe { self.0.as_ref() }
+ }
+}
+
+impl<T, A> DerefMut for Box<T, A>
+where
+ T: ?Sized,
+ A: Allocator,
+{
+ fn deref_mut(&mut self) -> &mut T {
+ // SAFETY: `self.0` is always properly aligned, dereferenceable and points to an initialized
+ // instance of `T`.
+ unsafe { self.0.as_mut() }
+ }
+}
+
+impl<T, A> fmt::Display for Box<T, A>
+where
+ T: ?Sized + fmt::Display,
+ A: Allocator,
+{
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ <T as fmt::Display>::fmt(&**self, f)
+ }
+}
+
+impl<T, A> fmt::Debug for Box<T, A>
+where
+ T: ?Sized + fmt::Debug,
+ A: Allocator,
+{
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ <T as fmt::Debug>::fmt(&**self, f)
+ }
+}
+
+impl<T, A> Drop for Box<T, A>
+where
+ T: ?Sized,
+ A: Allocator,
+{
+ fn drop(&mut self) {
+ let layout = Layout::for_value::<T>(self);
+
+ // SAFETY: The pointer in `self.0` is guaranteed to be valid by the type invariant.
+ unsafe { core::ptr::drop_in_place::<T>(self.deref_mut()) };
+
+ // SAFETY:
+ // - `self.0` was previously allocated with `A`.
+ // - `layout` is equal to the `Layout´ `self.0` was allocated with.
+ unsafe { A::free(self.0.cast(), layout) };
+ }
+}
diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs
new file mode 100644
index 000000000000..ae9d072741ce
--- /dev/null
+++ b/rust/kernel/alloc/kvec.rs
@@ -0,0 +1,913 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Implementation of [`Vec`].
+
+use super::{
+ allocator::{KVmalloc, Kmalloc, Vmalloc},
+ layout::ArrayLayout,
+ AllocError, Allocator, Box, Flags,
+};
+use core::{
+ fmt,
+ marker::PhantomData,
+ mem::{ManuallyDrop, MaybeUninit},
+ ops::Deref,
+ ops::DerefMut,
+ ops::Index,
+ ops::IndexMut,
+ ptr,
+ ptr::NonNull,
+ slice,
+ slice::SliceIndex,
+};
+
+/// Create a [`KVec`] containing the arguments.
+///
+/// New memory is allocated with `GFP_KERNEL`.
+///
+/// # Examples
+///
+/// ```
+/// let mut v = kernel::kvec![];
+/// v.push(1, GFP_KERNEL)?;
+/// assert_eq!(v, [1]);
+///
+/// let mut v = kernel::kvec![1; 3]?;
+/// v.push(4, GFP_KERNEL)?;
+/// assert_eq!(v, [1, 1, 1, 4]);
+///
+/// let mut v = kernel::kvec![1, 2, 3]?;
+/// v.push(4, GFP_KERNEL)?;
+/// assert_eq!(v, [1, 2, 3, 4]);
+///
+/// # Ok::<(), Error>(())
+/// ```
+#[macro_export]
+macro_rules! kvec {
+ () => (
+ $crate::alloc::KVec::new()
+ );
+ ($elem:expr; $n:expr) => (
+ $crate::alloc::KVec::from_elem($elem, $n, GFP_KERNEL)
+ );
+ ($($x:expr),+ $(,)?) => (
+ match $crate::alloc::KBox::new_uninit(GFP_KERNEL) {
+ Ok(b) => Ok($crate::alloc::KVec::from($crate::alloc::KBox::write(b, [$($x),+]))),
+ Err(e) => Err(e),
+ }
+ );
+}
+
+/// The kernel's [`Vec`] type.
+///
+/// A contiguous growable array type with contents allocated with the kernel's allocators (e.g.
+/// [`Kmalloc`], [`Vmalloc`] or [`KVmalloc`]), written `Vec<T, A>`.
+///
+/// For non-zero-sized values, a [`Vec`] will use the given allocator `A` for its allocation. For
+/// the most common allocators the type aliases [`KVec`], [`VVec`] and [`KVVec`] exist.
+///
+/// For zero-sized types the [`Vec`]'s pointer must be `dangling_mut::<T>`; no memory is allocated.
+///
+/// Generally, [`Vec`] consists of a pointer that represents the vector's backing buffer, the
+/// capacity of the vector (the number of elements that currently fit into the vector), its length
+/// (the number of elements that are currently stored in the vector) and the `Allocator` type used
+/// to allocate (and free) the backing buffer.
+///
+/// A [`Vec`] can be deconstructed into and (re-)constructed from its previously named raw parts
+/// and manually modified.
+///
+/// [`Vec`]'s backing buffer gets, if required, automatically increased (re-allocated) when elements
+/// are added to the vector.
+///
+/// # Invariants
+///
+/// - `self.ptr` is always properly aligned and either points to memory allocated with `A` or, for
+/// zero-sized types, is a dangling, well aligned pointer.
+///
+/// - `self.len` always represents the exact number of elements stored in the vector.
+///
+/// - `self.layout` represents the absolute number of elements that can be stored within the vector
+/// without re-allocation. For ZSTs `self.layout`'s capacity is zero. However, it is legal for the
+/// backing buffer to be larger than `layout`.
+///
+/// - The `Allocator` type `A` of the vector is the exact same `Allocator` type the backing buffer
+/// was allocated with (and must be freed with).
+pub struct Vec<T, A: Allocator> {
+ ptr: NonNull<T>,
+ /// Represents the actual buffer size as `cap` times `size_of::<T>` bytes.
+ ///
+ /// Note: This isn't quite the same as `Self::capacity`, which in contrast returns the number of
+ /// elements we can still store without reallocating.
+ layout: ArrayLayout<T>,
+ len: usize,
+ _p: PhantomData<A>,
+}
+
+/// Type alias for [`Vec`] with a [`Kmalloc`] allocator.
+///
+/// # Examples
+///
+/// ```
+/// let mut v = KVec::new();
+/// v.push(1, GFP_KERNEL)?;
+/// assert_eq!(&v, &[1]);
+///
+/// # Ok::<(), Error>(())
+/// ```
+pub type KVec<T> = Vec<T, Kmalloc>;
+
+/// Type alias for [`Vec`] with a [`Vmalloc`] allocator.
+///
+/// # Examples
+///
+/// ```
+/// let mut v = VVec::new();
+/// v.push(1, GFP_KERNEL)?;
+/// assert_eq!(&v, &[1]);
+///
+/// # Ok::<(), Error>(())
+/// ```
+pub type VVec<T> = Vec<T, Vmalloc>;
+
+/// Type alias for [`Vec`] with a [`KVmalloc`] allocator.
+///
+/// # Examples
+///
+/// ```
+/// let mut v = KVVec::new();
+/// v.push(1, GFP_KERNEL)?;
+/// assert_eq!(&v, &[1]);
+///
+/// # Ok::<(), Error>(())
+/// ```
+pub type KVVec<T> = Vec<T, KVmalloc>;
+
+// SAFETY: `Vec` is `Send` if `T` is `Send` because `Vec` owns its elements.
+unsafe impl<T, A> Send for Vec<T, A>
+where
+ T: Send,
+ A: Allocator,
+{
+}
+
+// SAFETY: `Vec` is `Sync` if `T` is `Sync` because `Vec` owns its elements.
+unsafe impl<T, A> Sync for Vec<T, A>
+where
+ T: Sync,
+ A: Allocator,
+{
+}
+
+impl<T, A> Vec<T, A>
+where
+ A: Allocator,
+{
+ #[inline]
+ const fn is_zst() -> bool {
+ core::mem::size_of::<T>() == 0
+ }
+
+ /// Returns the number of elements that can be stored within the vector without allocating
+ /// additional memory.
+ pub fn capacity(&self) -> usize {
+ if const { Self::is_zst() } {
+ usize::MAX
+ } else {
+ self.layout.len()
+ }
+ }
+
+ /// Returns the number of elements stored within the vector.
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.len
+ }
+
+ /// Forcefully sets `self.len` to `new_len`.
+ ///
+ /// # Safety
+ ///
+ /// - `new_len` must be less than or equal to [`Self::capacity`].
+ /// - If `new_len` is greater than `self.len`, all elements within the interval
+ /// [`self.len`,`new_len`) must be initialized.
+ #[inline]
+ pub unsafe fn set_len(&mut self, new_len: usize) {
+ debug_assert!(new_len <= self.capacity());
+ self.len = new_len;
+ }
+
+ /// Returns a slice of the entire vector.
+ #[inline]
+ pub fn as_slice(&self) -> &[T] {
+ self
+ }
+
+ /// Returns a mutable slice of the entire vector.
+ #[inline]
+ pub fn as_mut_slice(&mut self) -> &mut [T] {
+ self
+ }
+
+ /// Returns a mutable raw pointer to the vector's backing buffer, or, if `T` is a ZST, a
+ /// dangling raw pointer.
+ #[inline]
+ pub fn as_mut_ptr(&mut self) -> *mut T {
+ self.ptr.as_ptr()
+ }
+
+ /// Returns a raw pointer to the vector's backing buffer, or, if `T` is a ZST, a dangling raw
+ /// pointer.
+ #[inline]
+ pub fn as_ptr(&self) -> *const T {
+ self.ptr.as_ptr()
+ }
+
+ /// Returns `true` if the vector contains no elements, `false` otherwise.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = KVec::new();
+ /// assert!(v.is_empty());
+ ///
+ /// v.push(1, GFP_KERNEL);
+ /// assert!(!v.is_empty());
+ /// ```
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// Creates a new, empty `Vec<T, A>`.
+ ///
+ /// This method does not allocate by itself.
+ #[inline]
+ pub const fn new() -> Self {
+ // INVARIANT: Since this is a new, empty `Vec` with no backing memory yet,
+ // - `ptr` is a properly aligned dangling pointer for type `T`,
+ // - `layout` is an empty `ArrayLayout` (zero capacity)
+ // - `len` is zero, since no elements can be or have been stored,
+ // - `A` is always valid.
+ Self {
+ ptr: NonNull::dangling(),
+ layout: ArrayLayout::empty(),
+ len: 0,
+ _p: PhantomData::<A>,
+ }
+ }
+
+ /// Returns a slice of `MaybeUninit<T>` for the remaining spare capacity of the vector.
+ pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit<T>] {
+ // SAFETY:
+ // - `self.len` is smaller than `self.capacity` and hence, the resulting pointer is
+ // guaranteed to be part of the same allocated object.
+ // - `self.len` can not overflow `isize`.
+ let ptr = unsafe { self.as_mut_ptr().add(self.len) } as *mut MaybeUninit<T>;
+
+ // SAFETY: The memory between `self.len` and `self.capacity` is guaranteed to be allocated
+ // and valid, but uninitialized.
+ unsafe { slice::from_raw_parts_mut(ptr, self.capacity() - self.len) }
+ }
+
+ /// Appends an element to the back of the [`Vec`] instance.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = KVec::new();
+ /// v.push(1, GFP_KERNEL)?;
+ /// assert_eq!(&v, &[1]);
+ ///
+ /// v.push(2, GFP_KERNEL)?;
+ /// assert_eq!(&v, &[1, 2]);
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn push(&mut self, v: T, flags: Flags) -> Result<(), AllocError> {
+ self.reserve(1, flags)?;
+
+ // SAFETY:
+ // - `self.len` is smaller than `self.capacity` and hence, the resulting pointer is
+ // guaranteed to be part of the same allocated object.
+ // - `self.len` can not overflow `isize`.
+ let ptr = unsafe { self.as_mut_ptr().add(self.len) };
+
+ // SAFETY:
+ // - `ptr` is properly aligned and valid for writes.
+ unsafe { core::ptr::write(ptr, v) };
+
+ // SAFETY: We just initialised the first spare entry, so it is safe to increase the length
+ // by 1. We also know that the new length is <= capacity because of the previous call to
+ // `reserve` above.
+ unsafe { self.set_len(self.len() + 1) };
+ Ok(())
+ }
+
+ /// Creates a new [`Vec`] instance with at least the given capacity.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let v = KVec::<u32>::with_capacity(20, GFP_KERNEL)?;
+ ///
+ /// assert!(v.capacity() >= 20);
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn with_capacity(capacity: usize, flags: Flags) -> Result<Self, AllocError> {
+ let mut v = Vec::new();
+
+ v.reserve(capacity, flags)?;
+
+ Ok(v)
+ }
+
+ /// Creates a `Vec<T, A>` from a pointer, a length and a capacity using the allocator `A`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = kernel::kvec![1, 2, 3]?;
+ /// v.reserve(1, GFP_KERNEL)?;
+ ///
+ /// let (mut ptr, mut len, cap) = v.into_raw_parts();
+ ///
+ /// // SAFETY: We've just reserved memory for another element.
+ /// unsafe { ptr.add(len).write(4) };
+ /// len += 1;
+ ///
+ /// // SAFETY: We only wrote an additional element at the end of the `KVec`'s buffer and
+ /// // correspondingly increased the length of the `KVec` by one. Otherwise, we construct it
+ /// // from the exact same raw parts.
+ /// let v = unsafe { KVec::from_raw_parts(ptr, len, cap) };
+ ///
+ /// assert_eq!(v, [1, 2, 3, 4]);
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ ///
+ /// # Safety
+ ///
+ /// If `T` is a ZST:
+ ///
+ /// - `ptr` must be a dangling, well aligned pointer.
+ ///
+ /// Otherwise:
+ ///
+ /// - `ptr` must have been allocated with the allocator `A`.
+ /// - `ptr` must satisfy or exceed the alignment requirements of `T`.
+ /// - `ptr` must point to memory with a size of at least `size_of::<T>() * capacity` bytes.
+ /// - The allocated size in bytes must not be larger than `isize::MAX`.
+ /// - `length` must be less than or equal to `capacity`.
+ /// - The first `length` elements must be initialized values of type `T`.
+ ///
+ /// It is also valid to create an empty `Vec` passing a dangling pointer for `ptr` and zero for
+ /// `cap` and `len`.
+ pub unsafe fn from_raw_parts(ptr: *mut T, length: usize, capacity: usize) -> Self {
+ let layout = if Self::is_zst() {
+ ArrayLayout::empty()
+ } else {
+ // SAFETY: By the safety requirements of this function, `capacity * size_of::<T>()` is
+ // smaller than `isize::MAX`.
+ unsafe { ArrayLayout::new_unchecked(capacity) }
+ };
+
+ // INVARIANT: For ZSTs, we store an empty `ArrayLayout`, all other type invariants are
+ // covered by the safety requirements of this function.
+ Self {
+ // SAFETY: By the safety requirements, `ptr` is either dangling or pointing to a valid
+ // memory allocation, allocated with `A`.
+ ptr: unsafe { NonNull::new_unchecked(ptr) },
+ layout,
+ len: length,
+ _p: PhantomData::<A>,
+ }
+ }
+
+ /// Consumes the `Vec<T, A>` and returns its raw components `pointer`, `length` and `capacity`.
+ ///
+ /// This will not run the destructor of the contained elements and for non-ZSTs the allocation
+ /// will stay alive indefinitely. Use [`Vec::from_raw_parts`] to recover the [`Vec`], drop the
+ /// elements and free the allocation, if any.
+ pub fn into_raw_parts(self) -> (*mut T, usize, usize) {
+ let mut me = ManuallyDrop::new(self);
+ let len = me.len();
+ let capacity = me.capacity();
+ let ptr = me.as_mut_ptr();
+ (ptr, len, capacity)
+ }
+
+ /// Ensures that the capacity exceeds the length by at least `additional` elements.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = KVec::new();
+ /// v.push(1, GFP_KERNEL)?;
+ ///
+ /// v.reserve(10, GFP_KERNEL)?;
+ /// let cap = v.capacity();
+ /// assert!(cap >= 10);
+ ///
+ /// v.reserve(10, GFP_KERNEL)?;
+ /// let new_cap = v.capacity();
+ /// assert_eq!(new_cap, cap);
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn reserve(&mut self, additional: usize, flags: Flags) -> Result<(), AllocError> {
+ let len = self.len();
+ let cap = self.capacity();
+
+ if cap - len >= additional {
+ return Ok(());
+ }
+
+ if Self::is_zst() {
+ // The capacity is already `usize::MAX` for ZSTs, we can't go higher.
+ return Err(AllocError);
+ }
+
+ // We know that `cap <= isize::MAX` because of the type invariants of `Self`. So the
+ // multiplication by two won't overflow.
+ let new_cap = core::cmp::max(cap * 2, len.checked_add(additional).ok_or(AllocError)?);
+ let layout = ArrayLayout::new(new_cap).map_err(|_| AllocError)?;
+
+ // SAFETY:
+ // - `ptr` is valid because it's either `None` or comes from a previous call to
+ // `A::realloc`.
+ // - `self.layout` matches the `ArrayLayout` of the preceding allocation.
+ let ptr = unsafe {
+ A::realloc(
+ Some(self.ptr.cast()),
+ layout.into(),
+ self.layout.into(),
+ flags,
+ )?
+ };
+
+ // INVARIANT:
+ // - `layout` is some `ArrayLayout::<T>`,
+ // - `ptr` has been created by `A::realloc` from `layout`.
+ self.ptr = ptr.cast();
+ self.layout = layout;
+
+ Ok(())
+ }
+}
+
+impl<T: Clone, A: Allocator> Vec<T, A> {
+ /// Extend the vector by `n` clones of `value`.
+ pub fn extend_with(&mut self, n: usize, value: T, flags: Flags) -> Result<(), AllocError> {
+ if n == 0 {
+ return Ok(());
+ }
+
+ self.reserve(n, flags)?;
+
+ let spare = self.spare_capacity_mut();
+
+ for item in spare.iter_mut().take(n - 1) {
+ item.write(value.clone());
+ }
+
+ // We can write the last element directly without cloning needlessly.
+ spare[n - 1].write(value);
+
+ // SAFETY:
+ // - `self.len() + n < self.capacity()` due to the call to reserve above,
+ // - the loop and the line above initialized the next `n` elements.
+ unsafe { self.set_len(self.len() + n) };
+
+ Ok(())
+ }
+
+ /// Pushes clones of the elements of slice into the [`Vec`] instance.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut v = KVec::new();
+ /// v.push(1, GFP_KERNEL)?;
+ ///
+ /// v.extend_from_slice(&[20, 30, 40], GFP_KERNEL)?;
+ /// assert_eq!(&v, &[1, 20, 30, 40]);
+ ///
+ /// v.extend_from_slice(&[50, 60], GFP_KERNEL)?;
+ /// assert_eq!(&v, &[1, 20, 30, 40, 50, 60]);
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn extend_from_slice(&mut self, other: &[T], flags: Flags) -> Result<(), AllocError> {
+ self.reserve(other.len(), flags)?;
+ for (slot, item) in core::iter::zip(self.spare_capacity_mut(), other) {
+ slot.write(item.clone());
+ }
+
+ // SAFETY:
+ // - `other.len()` spare entries have just been initialized, so it is safe to increase
+ // the length by the same number.
+ // - `self.len() + other.len() <= self.capacity()` is guaranteed by the preceding `reserve`
+ // call.
+ unsafe { self.set_len(self.len() + other.len()) };
+ Ok(())
+ }
+
+ /// Create a new `Vec<T, A>` and extend it by `n` clones of `value`.
+ pub fn from_elem(value: T, n: usize, flags: Flags) -> Result<Self, AllocError> {
+ let mut v = Self::with_capacity(n, flags)?;
+
+ v.extend_with(n, value, flags)?;
+
+ Ok(v)
+ }
+}
+
+impl<T, A> Drop for Vec<T, A>
+where
+ A: Allocator,
+{
+ fn drop(&mut self) {
+ // SAFETY: `self.as_mut_ptr` is guaranteed to be valid by the type invariant.
+ unsafe {
+ ptr::drop_in_place(core::ptr::slice_from_raw_parts_mut(
+ self.as_mut_ptr(),
+ self.len,
+ ))
+ };
+
+ // SAFETY:
+ // - `self.ptr` was previously allocated with `A`.
+ // - `self.layout` matches the `ArrayLayout` of the preceding allocation.
+ unsafe { A::free(self.ptr.cast(), self.layout.into()) };
+ }
+}
+
+impl<T, A, const N: usize> From<Box<[T; N], A>> for Vec<T, A>
+where
+ A: Allocator,
+{
+ fn from(b: Box<[T; N], A>) -> Vec<T, A> {
+ let len = b.len();
+ let ptr = Box::into_raw(b);
+
+ // SAFETY:
+ // - `b` has been allocated with `A`,
+ // - `ptr` fulfills the alignment requirements for `T`,
+ // - `ptr` points to memory with at least a size of `size_of::<T>() * len`,
+ // - all elements within `b` are initialized values of `T`,
+ // - `len` does not exceed `isize::MAX`.
+ unsafe { Vec::from_raw_parts(ptr as _, len, len) }
+ }
+}
+
+impl<T> Default for KVec<T> {
+ #[inline]
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<T: fmt::Debug, A: Allocator> fmt::Debug for Vec<T, A> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(&**self, f)
+ }
+}
+
+impl<T, A> Deref for Vec<T, A>
+where
+ A: Allocator,
+{
+ type Target = [T];
+
+ #[inline]
+ fn deref(&self) -> &[T] {
+ // SAFETY: The memory behind `self.as_ptr()` is guaranteed to contain `self.len`
+ // initialized elements of type `T`.
+ unsafe { slice::from_raw_parts(self.as_ptr(), self.len) }
+ }
+}
+
+impl<T, A> DerefMut for Vec<T, A>
+where
+ A: Allocator,
+{
+ #[inline]
+ fn deref_mut(&mut self) -> &mut [T] {
+ // SAFETY: The memory behind `self.as_ptr()` is guaranteed to contain `self.len`
+ // initialized elements of type `T`.
+ unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) }
+ }
+}
+
+impl<T: Eq, A> Eq for Vec<T, A> where A: Allocator {}
+
+impl<T, I: SliceIndex<[T]>, A> Index<I> for Vec<T, A>
+where
+ A: Allocator,
+{
+ type Output = I::Output;
+
+ #[inline]
+ fn index(&self, index: I) -> &Self::Output {
+ Index::index(&**self, index)
+ }
+}
+
+impl<T, I: SliceIndex<[T]>, A> IndexMut<I> for Vec<T, A>
+where
+ A: Allocator,
+{
+ #[inline]
+ fn index_mut(&mut self, index: I) -> &mut Self::Output {
+ IndexMut::index_mut(&mut **self, index)
+ }
+}
+
+macro_rules! impl_slice_eq {
+ ($([$($vars:tt)*] $lhs:ty, $rhs:ty,)*) => {
+ $(
+ impl<T, U, $($vars)*> PartialEq<$rhs> for $lhs
+ where
+ T: PartialEq<U>,
+ {
+ #[inline]
+ fn eq(&self, other: &$rhs) -> bool { self[..] == other[..] }
+ }
+ )*
+ }
+}
+
+impl_slice_eq! {
+ [A1: Allocator, A2: Allocator] Vec<T, A1>, Vec<U, A2>,
+ [A: Allocator] Vec<T, A>, &[U],
+ [A: Allocator] Vec<T, A>, &mut [U],
+ [A: Allocator] &[T], Vec<U, A>,
+ [A: Allocator] &mut [T], Vec<U, A>,
+ [A: Allocator] Vec<T, A>, [U],
+ [A: Allocator] [T], Vec<U, A>,
+ [A: Allocator, const N: usize] Vec<T, A>, [U; N],
+ [A: Allocator, const N: usize] Vec<T, A>, &[U; N],
+}
+
+impl<'a, T, A> IntoIterator for &'a Vec<T, A>
+where
+ A: Allocator,
+{
+ type Item = &'a T;
+ type IntoIter = slice::Iter<'a, T>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.iter()
+ }
+}
+
+impl<'a, T, A: Allocator> IntoIterator for &'a mut Vec<T, A>
+where
+ A: Allocator,
+{
+ type Item = &'a mut T;
+ type IntoIter = slice::IterMut<'a, T>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.iter_mut()
+ }
+}
+
+/// An [`Iterator`] implementation for [`Vec`] that moves elements out of a vector.
+///
+/// This structure is created by the [`Vec::into_iter`] method on [`Vec`] (provided by the
+/// [`IntoIterator`] trait).
+///
+/// # Examples
+///
+/// ```
+/// let v = kernel::kvec![0, 1, 2]?;
+/// let iter = v.into_iter();
+///
+/// # Ok::<(), Error>(())
+/// ```
+pub struct IntoIter<T, A: Allocator> {
+ ptr: *mut T,
+ buf: NonNull<T>,
+ len: usize,
+ layout: ArrayLayout<T>,
+ _p: PhantomData<A>,
+}
+
+impl<T, A> IntoIter<T, A>
+where
+ A: Allocator,
+{
+ fn into_raw_parts(self) -> (*mut T, NonNull<T>, usize, usize) {
+ let me = ManuallyDrop::new(self);
+ let ptr = me.ptr;
+ let buf = me.buf;
+ let len = me.len;
+ let cap = me.layout.len();
+ (ptr, buf, len, cap)
+ }
+
+ /// Same as `Iterator::collect` but specialized for `Vec`'s `IntoIter`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let v = kernel::kvec![1, 2, 3]?;
+ /// let mut it = v.into_iter();
+ ///
+ /// assert_eq!(it.next(), Some(1));
+ ///
+ /// let v = it.collect(GFP_KERNEL);
+ /// assert_eq!(v, [2, 3]);
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ ///
+ /// # Implementation details
+ ///
+ /// Currently, we can't implement `FromIterator`. There are a couple of issues with this trait
+ /// in the kernel, namely:
+ ///
+ /// - Rust's specialization feature is unstable. This prevents us to optimize for the special
+ /// case where `I::IntoIter` equals `Vec`'s `IntoIter` type.
+ /// - We also can't use `I::IntoIter`'s type ID either to work around this, since `FromIterator`
+ /// doesn't require this type to be `'static`.
+ /// - `FromIterator::from_iter` does return `Self` instead of `Result<Self, AllocError>`, hence
+ /// we can't properly handle allocation failures.
+ /// - Neither `Iterator::collect` nor `FromIterator::from_iter` can handle additional allocation
+ /// flags.
+ ///
+ /// Instead, provide `IntoIter::collect`, such that we can at least convert a `IntoIter` into a
+ /// `Vec` again.
+ ///
+ /// Note that `IntoIter::collect` doesn't require `Flags`, since it re-uses the existing backing
+ /// buffer. However, this backing buffer may be shrunk to the actual count of elements.
+ pub fn collect(self, flags: Flags) -> Vec<T, A> {
+ let old_layout = self.layout;
+ let (mut ptr, buf, len, mut cap) = self.into_raw_parts();
+ let has_advanced = ptr != buf.as_ptr();
+
+ if has_advanced {
+ // Copy the contents we have advanced to at the beginning of the buffer.
+ //
+ // SAFETY:
+ // - `ptr` is valid for reads of `len * size_of::<T>()` bytes,
+ // - `buf.as_ptr()` is valid for writes of `len * size_of::<T>()` bytes,
+ // - `ptr` and `buf.as_ptr()` are not be subject to aliasing restrictions relative to
+ // each other,
+ // - both `ptr` and `buf.ptr()` are properly aligned.
+ unsafe { ptr::copy(ptr, buf.as_ptr(), len) };
+ ptr = buf.as_ptr();
+
+ // SAFETY: `len` is guaranteed to be smaller than `self.layout.len()`.
+ let layout = unsafe { ArrayLayout::<T>::new_unchecked(len) };
+
+ // SAFETY: `buf` points to the start of the backing buffer and `len` is guaranteed to be
+ // smaller than `cap`. Depending on `alloc` this operation may shrink the buffer or leaves
+ // it as it is.
+ ptr = match unsafe {
+ A::realloc(Some(buf.cast()), layout.into(), old_layout.into(), flags)
+ } {
+ // If we fail to shrink, which likely can't even happen, continue with the existing
+ // buffer.
+ Err(_) => ptr,
+ Ok(ptr) => {
+ cap = len;
+ ptr.as_ptr().cast()
+ }
+ };
+ }
+
+ // SAFETY: If the iterator has been advanced, the advanced elements have been copied to
+ // the beginning of the buffer and `len` has been adjusted accordingly.
+ //
+ // - `ptr` is guaranteed to point to the start of the backing buffer.
+ // - `cap` is either the original capacity or, after shrinking the buffer, equal to `len`.
+ // - `alloc` is guaranteed to be unchanged since `into_iter` has been called on the original
+ // `Vec`.
+ unsafe { Vec::from_raw_parts(ptr, len, cap) }
+ }
+}
+
+impl<T, A> Iterator for IntoIter<T, A>
+where
+ A: Allocator,
+{
+ type Item = T;
+
+ /// # Examples
+ ///
+ /// ```
+ /// let v = kernel::kvec![1, 2, 3]?;
+ /// let mut it = v.into_iter();
+ ///
+ /// assert_eq!(it.next(), Some(1));
+ /// assert_eq!(it.next(), Some(2));
+ /// assert_eq!(it.next(), Some(3));
+ /// assert_eq!(it.next(), None);
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ fn next(&mut self) -> Option<T> {
+ if self.len == 0 {
+ return None;
+ }
+
+ let current = self.ptr;
+
+ // SAFETY: We can't overflow; decreasing `self.len` by one every time we advance `self.ptr`
+ // by one guarantees that.
+ unsafe { self.ptr = self.ptr.add(1) };
+
+ self.len -= 1;
+
+ // SAFETY: `current` is guaranteed to point at a valid element within the buffer.
+ Some(unsafe { current.read() })
+ }
+
+ /// # Examples
+ ///
+ /// ```
+ /// let v: KVec<u32> = kernel::kvec![1, 2, 3]?;
+ /// let mut iter = v.into_iter();
+ /// let size = iter.size_hint().0;
+ ///
+ /// iter.next();
+ /// assert_eq!(iter.size_hint().0, size - 1);
+ ///
+ /// iter.next();
+ /// assert_eq!(iter.size_hint().0, size - 2);
+ ///
+ /// iter.next();
+ /// assert_eq!(iter.size_hint().0, size - 3);
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (self.len, Some(self.len))
+ }
+}
+
+impl<T, A> Drop for IntoIter<T, A>
+where
+ A: Allocator,
+{
+ fn drop(&mut self) {
+ // SAFETY: `self.ptr` is guaranteed to be valid by the type invariant.
+ unsafe { ptr::drop_in_place(ptr::slice_from_raw_parts_mut(self.ptr, self.len)) };
+
+ // SAFETY:
+ // - `self.buf` was previously allocated with `A`.
+ // - `self.layout` matches the `ArrayLayout` of the preceding allocation.
+ unsafe { A::free(self.buf.cast(), self.layout.into()) };
+ }
+}
+
+impl<T, A> IntoIterator for Vec<T, A>
+where
+ A: Allocator,
+{
+ type Item = T;
+ type IntoIter = IntoIter<T, A>;
+
+ /// Consumes the `Vec<T, A>` and creates an `Iterator`, which moves each value out of the
+ /// vector (from start to end).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let v = kernel::kvec![1, 2]?;
+ /// let mut v_iter = v.into_iter();
+ ///
+ /// let first_element: Option<u32> = v_iter.next();
+ ///
+ /// assert_eq!(first_element, Some(1));
+ /// assert_eq!(v_iter.next(), Some(2));
+ /// assert_eq!(v_iter.next(), None);
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ ///
+ /// ```
+ /// let v = kernel::kvec![];
+ /// let mut v_iter = v.into_iter();
+ ///
+ /// let first_element: Option<u32> = v_iter.next();
+ ///
+ /// assert_eq!(first_element, None);
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ #[inline]
+ fn into_iter(self) -> Self::IntoIter {
+ let buf = self.ptr;
+ let layout = self.layout;
+ let (ptr, len, _) = self.into_raw_parts();
+
+ IntoIter {
+ ptr,
+ buf,
+ len,
+ layout,
+ _p: PhantomData::<A>,
+ }
+ }
+}
diff --git a/rust/kernel/alloc/layout.rs b/rust/kernel/alloc/layout.rs
new file mode 100644
index 000000000000..93ed514f7cc7
--- /dev/null
+++ b/rust/kernel/alloc/layout.rs
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Memory layout.
+//!
+//! Custom layout types extending or improving [`Layout`].
+
+use core::{alloc::Layout, marker::PhantomData};
+
+/// Error when constructing an [`ArrayLayout`].
+pub struct LayoutError;
+
+/// A layout for an array `[T; n]`.
+///
+/// # Invariants
+///
+/// - `len * size_of::<T>() <= isize::MAX`.
+pub struct ArrayLayout<T> {
+ len: usize,
+ _phantom: PhantomData<fn() -> T>,
+}
+
+impl<T> Clone for ArrayLayout<T> {
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+impl<T> Copy for ArrayLayout<T> {}
+
+const ISIZE_MAX: usize = isize::MAX as usize;
+
+impl<T> ArrayLayout<T> {
+ /// Creates a new layout for `[T; 0]`.
+ pub const fn empty() -> Self {
+ // INVARIANT: `0 * size_of::<T>() <= isize::MAX`.
+ Self {
+ len: 0,
+ _phantom: PhantomData,
+ }
+ }
+
+ /// Creates a new layout for `[T; len]`.
+ ///
+ /// # Errors
+ ///
+ /// When `len * size_of::<T>()` overflows or when `len * size_of::<T>() > isize::MAX`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use kernel::alloc::layout::{ArrayLayout, LayoutError};
+ /// let layout = ArrayLayout::<i32>::new(15)?;
+ /// assert_eq!(layout.len(), 15);
+ ///
+ /// // Errors because `len * size_of::<T>()` overflows.
+ /// let layout = ArrayLayout::<i32>::new(isize::MAX as usize);
+ /// assert!(layout.is_err());
+ ///
+ /// // Errors because `len * size_of::<i32>() > isize::MAX`,
+ /// // even though `len < isize::MAX`.
+ /// let layout = ArrayLayout::<i32>::new(isize::MAX as usize / 2);
+ /// assert!(layout.is_err());
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub const fn new(len: usize) -> Result<Self, LayoutError> {
+ match len.checked_mul(core::mem::size_of::<T>()) {
+ Some(size) if size <= ISIZE_MAX => {
+ // INVARIANT: We checked above that `len * size_of::<T>() <= isize::MAX`.
+ Ok(Self {
+ len,
+ _phantom: PhantomData,
+ })
+ }
+ _ => Err(LayoutError),
+ }
+ }
+
+ /// Creates a new layout for `[T; len]`.
+ ///
+ /// # Safety
+ ///
+ /// `len` must be a value, for which `len * size_of::<T>() <= isize::MAX` is true.
+ pub unsafe fn new_unchecked(len: usize) -> Self {
+ // INVARIANT: By the safety requirements of this function
+ // `len * size_of::<T>() <= isize::MAX`.
+ Self {
+ len,
+ _phantom: PhantomData,
+ }
+ }
+
+ /// Returns the number of array elements represented by this layout.
+ pub const fn len(&self) -> usize {
+ self.len
+ }
+
+ /// Returns `true` when no array elements are represented by this layout.
+ pub const fn is_empty(&self) -> bool {
+ self.len == 0
+ }
+}
+
+impl<T> From<ArrayLayout<T>> for Layout {
+ fn from(value: ArrayLayout<T>) -> Self {
+ let res = Layout::array::<T>(value.len);
+ // SAFETY: By the type invariant of `ArrayLayout` we have
+ // `len * size_of::<T>() <= isize::MAX` and thus the result must be `Ok`.
+ unsafe { res.unwrap_unchecked() }
+ }
+}
diff --git a/rust/kernel/allocator.rs b/rust/kernel/allocator.rs
deleted file mode 100644
index 4b057e837358..000000000000
--- a/rust/kernel/allocator.rs
+++ /dev/null
@@ -1,88 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-//! Allocator support.
-
-use core::alloc::{GlobalAlloc, Layout};
-use core::ptr;
-
-use crate::bindings;
-
-struct KernelAllocator;
-
-/// Calls `krealloc` with a proper size to alloc a new object aligned to `new_layout`'s alignment.
-///
-/// # Safety
-///
-/// - `ptr` can be either null or a pointer which has been allocated by this allocator.
-/// - `new_layout` must have a non-zero size.
-unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: bindings::gfp_t) -> *mut u8 {
- // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first.
- let layout = new_layout.pad_to_align();
-
- let mut size = layout.size();
-
- if layout.align() > bindings::ARCH_SLAB_MINALIGN {
- // The alignment requirement exceeds the slab guarantee, thus try to enlarge the size
- // to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for
- // more information).
- //
- // Note that `layout.size()` (after padding) is guaranteed to be a multiple of
- // `layout.align()`, so `next_power_of_two` gives enough alignment guarantee.
- size = size.next_power_of_two();
- }
-
- // SAFETY:
- // - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the
- // function safety requirement.
- // - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero
- // according to the function safety requirement) or a result from `next_power_of_two()`.
- unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags) as *mut u8 }
-}
-
-unsafe impl GlobalAlloc for KernelAllocator {
- unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
- // SAFETY: `ptr::null_mut()` is null and `layout` has a non-zero size by the function safety
- // requirement.
- unsafe { krealloc_aligned(ptr::null_mut(), layout, bindings::GFP_KERNEL) }
- }
-
- unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) {
- unsafe {
- bindings::kfree(ptr as *const core::ffi::c_void);
- }
- }
-
- unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
- // SAFETY:
- // - `new_size`, when rounded up to the nearest multiple of `layout.align()`, will not
- // overflow `isize` by the function safety requirement.
- // - `layout.align()` is a proper alignment (i.e. not zero and must be a power of two).
- let layout = unsafe { Layout::from_size_align_unchecked(new_size, layout.align()) };
-
- // SAFETY:
- // - `ptr` is either null or a pointer allocated by this allocator by the function safety
- // requirement.
- // - the size of `layout` is not zero because `new_size` is not zero by the function safety
- // requirement.
- unsafe { krealloc_aligned(ptr, layout, bindings::GFP_KERNEL) }
- }
-
- unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
- // SAFETY: `ptr::null_mut()` is null and `layout` has a non-zero size by the function safety
- // requirement.
- unsafe {
- krealloc_aligned(
- ptr::null_mut(),
- layout,
- bindings::GFP_KERNEL | bindings::__GFP_ZERO,
- )
- }
- }
-}
-
-#[global_allocator]
-static ALLOCATOR: KernelAllocator = KernelAllocator;
-
-// See <https://github.com/rust-lang/rust/pull/86844>.
-#[no_mangle]
-static __rust_no_alloc_shim_is_unstable: u8 = 0;
diff --git a/rust/kernel/block.rs b/rust/kernel/block.rs
new file mode 100644
index 000000000000..150f710efe5b
--- /dev/null
+++ b/rust/kernel/block.rs
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Types for working with the block layer.
+
+pub mod mq;
diff --git a/rust/kernel/block/mq.rs b/rust/kernel/block/mq.rs
new file mode 100644
index 000000000000..fb0f393c1cea
--- /dev/null
+++ b/rust/kernel/block/mq.rs
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This module provides types for implementing block drivers that interface the
+//! blk-mq subsystem.
+//!
+//! To implement a block device driver, a Rust module must do the following:
+//!
+//! - Implement [`Operations`] for a type `T`.
+//! - Create a [`TagSet<T>`].
+//! - Create a [`GenDisk<T>`], via the [`GenDiskBuilder`].
+//! - Add the disk to the system by calling [`GenDiskBuilder::build`] passing in
+//! the `TagSet` reference.
+//!
+//! The types available in this module that have direct C counterparts are:
+//!
+//! - The [`TagSet`] type that abstracts the C type `struct tag_set`.
+//! - The [`GenDisk`] type that abstracts the C type `struct gendisk`.
+//! - The [`Request`] type that abstracts the C type `struct request`.
+//!
+//! The kernel will interface with the block device driver by calling the method
+//! implementations of the `Operations` trait.
+//!
+//! IO requests are passed to the driver as [`kernel::types::ARef<Request>`]
+//! instances. The `Request` type is a wrapper around the C `struct request`.
+//! The driver must mark end of processing by calling one of the
+//! `Request::end`, methods. Failure to do so can lead to deadlock or timeout
+//! errors. Please note that the C function `blk_mq_start_request` is implicitly
+//! called when the request is queued with the driver.
+//!
+//! The `TagSet` is responsible for creating and maintaining a mapping between
+//! `Request`s and integer ids as well as carrying a pointer to the vtable
+//! generated by `Operations`. This mapping is useful for associating
+//! completions from hardware with the correct `Request` instance. The `TagSet`
+//! determines the maximum queue depth by setting the number of `Request`
+//! instances available to the driver, and it determines the number of queues to
+//! instantiate for the driver. If possible, a driver should allocate one queue
+//! per core, to keep queue data local to a core.
+//!
+//! One `TagSet` instance can be shared between multiple `GenDisk` instances.
+//! This can be useful when implementing drivers where one piece of hardware
+//! with one set of IO resources are represented to the user as multiple disks.
+//!
+//! One significant difference between block device drivers implemented with
+//! these Rust abstractions and drivers implemented in C, is that the Rust
+//! drivers have to own a reference count on the `Request` type when the IO is
+//! in flight. This is to ensure that the C `struct request` instances backing
+//! the Rust `Request` instances are live while the Rust driver holds a
+//! reference to the `Request`. In addition, the conversion of an integer tag to
+//! a `Request` via the `TagSet` would not be sound without this bookkeeping.
+//!
+//! [`GenDisk`]: gen_disk::GenDisk
+//! [`GenDisk<T>`]: gen_disk::GenDisk
+//! [`GenDiskBuilder`]: gen_disk::GenDiskBuilder
+//! [`GenDiskBuilder::build`]: gen_disk::GenDiskBuilder::build
+//!
+//! # Example
+//!
+//! ```rust
+//! use kernel::{
+//! alloc::flags,
+//! block::mq::*,
+//! new_mutex,
+//! prelude::*,
+//! sync::{Arc, Mutex},
+//! types::{ARef, ForeignOwnable},
+//! };
+//!
+//! struct MyBlkDevice;
+//!
+//! #[vtable]
+//! impl Operations for MyBlkDevice {
+//!
+//! fn queue_rq(rq: ARef<Request<Self>>, _is_last: bool) -> Result {
+//! Request::end_ok(rq);
+//! Ok(())
+//! }
+//!
+//! fn commit_rqs() {}
+//! }
+//!
+//! let tagset: Arc<TagSet<MyBlkDevice>> =
+//! Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
+//! let mut disk = gen_disk::GenDiskBuilder::new()
+//! .capacity_sectors(4096)
+//! .build(format_args!("myblk"), tagset)?;
+//!
+//! # Ok::<(), kernel::error::Error>(())
+//! ```
+
+pub mod gen_disk;
+mod operations;
+mod raw_writer;
+mod request;
+mod tag_set;
+
+pub use operations::Operations;
+pub use request::Request;
+pub use tag_set::TagSet;
diff --git a/rust/kernel/block/mq/gen_disk.rs b/rust/kernel/block/mq/gen_disk.rs
new file mode 100644
index 000000000000..14806e1997fd
--- /dev/null
+++ b/rust/kernel/block/mq/gen_disk.rs
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Generic disk abstraction.
+//!
+//! C header: [`include/linux/blkdev.h`](srctree/include/linux/blkdev.h)
+//! C header: [`include/linux/blk_mq.h`](srctree/include/linux/blk_mq.h)
+
+use crate::block::mq::{raw_writer::RawWriter, Operations, TagSet};
+use crate::{bindings, error::from_err_ptr, error::Result, sync::Arc};
+use crate::{error, static_lock_class};
+use core::fmt::{self, Write};
+
+/// A builder for [`GenDisk`].
+///
+/// Use this struct to configure and add new [`GenDisk`] to the VFS.
+pub struct GenDiskBuilder {
+ rotational: bool,
+ logical_block_size: u32,
+ physical_block_size: u32,
+ capacity_sectors: u64,
+}
+
+impl Default for GenDiskBuilder {
+ fn default() -> Self {
+ Self {
+ rotational: false,
+ logical_block_size: bindings::PAGE_SIZE as u32,
+ physical_block_size: bindings::PAGE_SIZE as u32,
+ capacity_sectors: 0,
+ }
+ }
+}
+
+impl GenDiskBuilder {
+ /// Create a new instance.
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Set the rotational media attribute for the device to be built.
+ pub fn rotational(mut self, rotational: bool) -> Self {
+ self.rotational = rotational;
+ self
+ }
+
+ /// Validate block size by verifying that it is between 512 and `PAGE_SIZE`,
+ /// and that it is a power of two.
+ fn validate_block_size(size: u32) -> Result {
+ if !(512..=bindings::PAGE_SIZE as u32).contains(&size) || !size.is_power_of_two() {
+ Err(error::code::EINVAL)
+ } else {
+ Ok(())
+ }
+ }
+
+ /// Set the logical block size of the device to be built.
+ ///
+ /// This method will check that block size is a power of two and between 512
+ /// and 4096. If not, an error is returned and the block size is not set.
+ ///
+ /// This is the smallest unit the storage device can address. It is
+ /// typically 4096 bytes.
+ pub fn logical_block_size(mut self, block_size: u32) -> Result<Self> {
+ Self::validate_block_size(block_size)?;
+ self.logical_block_size = block_size;
+ Ok(self)
+ }
+
+ /// Set the physical block size of the device to be built.
+ ///
+ /// This method will check that block size is a power of two and between 512
+ /// and 4096. If not, an error is returned and the block size is not set.
+ ///
+ /// This is the smallest unit a physical storage device can write
+ /// atomically. It is usually the same as the logical block size but may be
+ /// bigger. One example is SATA drives with 4096 byte physical block size
+ /// that expose a 512 byte logical block size to the operating system.
+ pub fn physical_block_size(mut self, block_size: u32) -> Result<Self> {
+ Self::validate_block_size(block_size)?;
+ self.physical_block_size = block_size;
+ Ok(self)
+ }
+
+ /// Set the capacity of the device to be built, in sectors (512 bytes).
+ pub fn capacity_sectors(mut self, capacity: u64) -> Self {
+ self.capacity_sectors = capacity;
+ self
+ }
+
+ /// Build a new `GenDisk` and add it to the VFS.
+ pub fn build<T: Operations>(
+ self,
+ name: fmt::Arguments<'_>,
+ tagset: Arc<TagSet<T>>,
+ ) -> Result<GenDisk<T>> {
+ // SAFETY: `bindings::queue_limits` contain only fields that are valid when zeroed.
+ let mut lim: bindings::queue_limits = unsafe { core::mem::zeroed() };
+
+ lim.logical_block_size = self.logical_block_size;
+ lim.physical_block_size = self.physical_block_size;
+ if self.rotational {
+ lim.features = bindings::BLK_FEAT_ROTATIONAL;
+ }
+
+ // SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set
+ let gendisk = from_err_ptr(unsafe {
+ bindings::__blk_mq_alloc_disk(
+ tagset.raw_tag_set(),
+ &mut lim,
+ core::ptr::null_mut(),
+ static_lock_class!().as_ptr(),
+ )
+ })?;
+
+ const TABLE: bindings::block_device_operations = bindings::block_device_operations {
+ submit_bio: None,
+ open: None,
+ release: None,
+ ioctl: None,
+ compat_ioctl: None,
+ check_events: None,
+ unlock_native_capacity: None,
+ getgeo: None,
+ set_read_only: None,
+ swap_slot_free_notify: None,
+ report_zones: None,
+ devnode: None,
+ alternative_gpt_sector: None,
+ get_unique_id: None,
+ // TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to
+ // be merged (unstable in rustc 1.78 which is staged for linux 6.10)
+ // https://github.com/rust-lang/rust/issues/119618
+ owner: core::ptr::null_mut(),
+ pr_ops: core::ptr::null_mut(),
+ free_disk: None,
+ poll_bio: None,
+ };
+
+ // SAFETY: `gendisk` is a valid pointer as we initialized it above
+ unsafe { (*gendisk).fops = &TABLE };
+
+ let mut raw_writer = RawWriter::from_array(
+ // SAFETY: `gendisk` points to a valid and initialized instance. We
+ // have exclusive access, since the disk is not added to the VFS
+ // yet.
+ unsafe { &mut (*gendisk).disk_name },
+ )?;
+ raw_writer.write_fmt(name)?;
+ raw_writer.write_char('\0')?;
+
+ // SAFETY: `gendisk` points to a valid and initialized instance of
+ // `struct gendisk`. `set_capacity` takes a lock to synchronize this
+ // operation, so we will not race.
+ unsafe { bindings::set_capacity(gendisk, self.capacity_sectors) };
+
+ crate::error::to_result(
+ // SAFETY: `gendisk` points to a valid and initialized instance of
+ // `struct gendisk`.
+ unsafe {
+ bindings::device_add_disk(core::ptr::null_mut(), gendisk, core::ptr::null_mut())
+ },
+ )?;
+
+ // INVARIANT: `gendisk` was initialized above.
+ // INVARIANT: `gendisk` was added to the VFS via `device_add_disk` above.
+ Ok(GenDisk {
+ _tagset: tagset,
+ gendisk,
+ })
+ }
+}
+
+/// A generic block device.
+///
+/// # Invariants
+///
+/// - `gendisk` must always point to an initialized and valid `struct gendisk`.
+/// - `gendisk` was added to the VFS through a call to
+/// `bindings::device_add_disk`.
+pub struct GenDisk<T: Operations> {
+ _tagset: Arc<TagSet<T>>,
+ gendisk: *mut bindings::gendisk,
+}
+
+// SAFETY: `GenDisk` is an owned pointer to a `struct gendisk` and an `Arc` to a
+// `TagSet` It is safe to send this to other threads as long as T is Send.
+unsafe impl<T: Operations + Send> Send for GenDisk<T> {}
+
+impl<T: Operations> Drop for GenDisk<T> {
+ fn drop(&mut self) {
+ // SAFETY: By type invariant, `self.gendisk` points to a valid and
+ // initialized instance of `struct gendisk`, and it was previously added
+ // to the VFS.
+ unsafe { bindings::del_gendisk(self.gendisk) };
+ }
+}
diff --git a/rust/kernel/block/mq/operations.rs b/rust/kernel/block/mq/operations.rs
new file mode 100644
index 000000000000..864ff379dc91
--- /dev/null
+++ b/rust/kernel/block/mq/operations.rs
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This module provides an interface for blk-mq drivers to implement.
+//!
+//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h)
+
+use crate::{
+ bindings,
+ block::mq::request::RequestDataWrapper,
+ block::mq::Request,
+ error::{from_result, Result},
+ prelude::*,
+ types::ARef,
+};
+use core::{marker::PhantomData, sync::atomic::AtomicU64, sync::atomic::Ordering};
+
+/// Implement this trait to interface blk-mq as block devices.
+///
+/// To implement a block device driver, implement this trait as described in the
+/// [module level documentation]. The kernel will use the implementation of the
+/// functions defined in this trait to interface a block device driver. Note:
+/// There is no need for an exit_request() implementation, because the `drop`
+/// implementation of the [`Request`] type will be invoked by automatically by
+/// the C/Rust glue logic.
+///
+/// [module level documentation]: kernel::block::mq
+#[macros::vtable]
+pub trait Operations: Sized {
+ /// Called by the kernel to queue a request with the driver. If `is_last` is
+ /// `false`, the driver is allowed to defer committing the request.
+ fn queue_rq(rq: ARef<Request<Self>>, is_last: bool) -> Result;
+
+ /// Called by the kernel to indicate that queued requests should be submitted.
+ fn commit_rqs();
+
+ /// Called by the kernel to poll the device for completed requests. Only
+ /// used for poll queues.
+ fn poll() -> bool {
+ build_error!(crate::error::VTABLE_DEFAULT_ERROR)
+ }
+}
+
+/// A vtable for blk-mq to interact with a block device driver.
+///
+/// A `bindings::blk_mq_ops` vtable is constructed from pointers to the `extern
+/// "C"` functions of this struct, exposed through the `OperationsVTable::VTABLE`.
+///
+/// For general documentation of these methods, see the kernel source
+/// documentation related to `struct blk_mq_operations` in
+/// [`include/linux/blk-mq.h`].
+///
+/// [`include/linux/blk-mq.h`]: srctree/include/linux/blk-mq.h
+pub(crate) struct OperationsVTable<T: Operations>(PhantomData<T>);
+
+impl<T: Operations> OperationsVTable<T> {
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// - The caller of this function must ensure that the pointee of `bd` is
+ /// valid for reads for the duration of this function.
+ /// - This function must be called for an initialized and live `hctx`. That
+ /// is, `Self::init_hctx_callback` was called and
+ /// `Self::exit_hctx_callback()` was not yet called.
+ /// - `(*bd).rq` must point to an initialized and live `bindings:request`.
+ /// That is, `Self::init_request_callback` was called but
+ /// `Self::exit_request_callback` was not yet called for the request.
+ /// - `(*bd).rq` must be owned by the driver. That is, the block layer must
+ /// promise to not access the request until the driver calls
+ /// `bindings::blk_mq_end_request` for the request.
+ unsafe extern "C" fn queue_rq_callback(
+ _hctx: *mut bindings::blk_mq_hw_ctx,
+ bd: *const bindings::blk_mq_queue_data,
+ ) -> bindings::blk_status_t {
+ // SAFETY: `bd.rq` is valid as required by the safety requirement for
+ // this function.
+ let request = unsafe { &*(*bd).rq.cast::<Request<T>>() };
+
+ // One refcount for the ARef, one for being in flight
+ request.wrapper_ref().refcount().store(2, Ordering::Relaxed);
+
+ // SAFETY:
+ // - We own a refcount that we took above. We pass that to `ARef`.
+ // - By the safety requirements of this function, `request` is a valid
+ // `struct request` and the private data is properly initialized.
+ // - `rq` will be alive until `blk_mq_end_request` is called and is
+ // reference counted by `ARef` until then.
+ let rq = unsafe { Request::aref_from_raw((*bd).rq) };
+
+ // SAFETY: We have exclusive access and we just set the refcount above.
+ unsafe { Request::start_unchecked(&rq) };
+
+ let ret = T::queue_rq(
+ rq,
+ // SAFETY: `bd` is valid as required by the safety requirement for
+ // this function.
+ unsafe { (*bd).last },
+ );
+
+ if let Err(e) = ret {
+ e.to_blk_status()
+ } else {
+ bindings::BLK_STS_OK as _
+ }
+ }
+
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// This function may only be called by blk-mq C infrastructure.
+ unsafe extern "C" fn commit_rqs_callback(_hctx: *mut bindings::blk_mq_hw_ctx) {
+ T::commit_rqs()
+ }
+
+ /// This function is called by the C kernel. It is not currently
+ /// implemented, and there is no way to exercise this code path.
+ ///
+ /// # Safety
+ ///
+ /// This function may only be called by blk-mq C infrastructure.
+ unsafe extern "C" fn complete_callback(_rq: *mut bindings::request) {}
+
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// This function may only be called by blk-mq C infrastructure.
+ unsafe extern "C" fn poll_callback(
+ _hctx: *mut bindings::blk_mq_hw_ctx,
+ _iob: *mut bindings::io_comp_batch,
+ ) -> crate::ffi::c_int {
+ T::poll().into()
+ }
+
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// This function may only be called by blk-mq C infrastructure. This
+ /// function may only be called once before `exit_hctx_callback` is called
+ /// for the same context.
+ unsafe extern "C" fn init_hctx_callback(
+ _hctx: *mut bindings::blk_mq_hw_ctx,
+ _tagset_data: *mut crate::ffi::c_void,
+ _hctx_idx: crate::ffi::c_uint,
+ ) -> crate::ffi::c_int {
+ from_result(|| Ok(0))
+ }
+
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// This function may only be called by blk-mq C infrastructure.
+ unsafe extern "C" fn exit_hctx_callback(
+ _hctx: *mut bindings::blk_mq_hw_ctx,
+ _hctx_idx: crate::ffi::c_uint,
+ ) {
+ }
+
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// - This function may only be called by blk-mq C infrastructure.
+ /// - `_set` must point to an initialized `TagSet<T>`.
+ /// - `rq` must point to an initialized `bindings::request`.
+ /// - The allocation pointed to by `rq` must be at the size of `Request`
+ /// plus the size of `RequestDataWrapper`.
+ unsafe extern "C" fn init_request_callback(
+ _set: *mut bindings::blk_mq_tag_set,
+ rq: *mut bindings::request,
+ _hctx_idx: crate::ffi::c_uint,
+ _numa_node: crate::ffi::c_uint,
+ ) -> crate::ffi::c_int {
+ from_result(|| {
+ // SAFETY: By the safety requirements of this function, `rq` points
+ // to a valid allocation.
+ let pdu = unsafe { Request::wrapper_ptr(rq.cast::<Request<T>>()) };
+
+ // SAFETY: The refcount field is allocated but not initialized, so
+ // it is valid for writes.
+ unsafe { RequestDataWrapper::refcount_ptr(pdu.as_ptr()).write(AtomicU64::new(0)) };
+
+ Ok(0)
+ })
+ }
+
+ /// This function is called by the C kernel. A pointer to this function is
+ /// installed in the `blk_mq_ops` vtable for the driver.
+ ///
+ /// # Safety
+ ///
+ /// - This function may only be called by blk-mq C infrastructure.
+ /// - `_set` must point to an initialized `TagSet<T>`.
+ /// - `rq` must point to an initialized and valid `Request`.
+ unsafe extern "C" fn exit_request_callback(
+ _set: *mut bindings::blk_mq_tag_set,
+ rq: *mut bindings::request,
+ _hctx_idx: crate::ffi::c_uint,
+ ) {
+ // SAFETY: The tagset invariants guarantee that all requests are allocated with extra memory
+ // for the request data.
+ let pdu = unsafe { bindings::blk_mq_rq_to_pdu(rq) }.cast::<RequestDataWrapper>();
+
+ // SAFETY: `pdu` is valid for read and write and is properly initialised.
+ unsafe { core::ptr::drop_in_place(pdu) };
+ }
+
+ const VTABLE: bindings::blk_mq_ops = bindings::blk_mq_ops {
+ queue_rq: Some(Self::queue_rq_callback),
+ queue_rqs: None,
+ commit_rqs: Some(Self::commit_rqs_callback),
+ get_budget: None,
+ put_budget: None,
+ set_rq_budget_token: None,
+ get_rq_budget_token: None,
+ timeout: None,
+ poll: if T::HAS_POLL {
+ Some(Self::poll_callback)
+ } else {
+ None
+ },
+ complete: Some(Self::complete_callback),
+ init_hctx: Some(Self::init_hctx_callback),
+ exit_hctx: Some(Self::exit_hctx_callback),
+ init_request: Some(Self::init_request_callback),
+ exit_request: Some(Self::exit_request_callback),
+ cleanup_rq: None,
+ busy: None,
+ map_queues: None,
+ #[cfg(CONFIG_BLK_DEBUG_FS)]
+ show_rq: None,
+ };
+
+ pub(crate) const fn build() -> &'static bindings::blk_mq_ops {
+ &Self::VTABLE
+ }
+}
diff --git a/rust/kernel/block/mq/raw_writer.rs b/rust/kernel/block/mq/raw_writer.rs
new file mode 100644
index 000000000000..7e2159e4f6a6
--- /dev/null
+++ b/rust/kernel/block/mq/raw_writer.rs
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use core::fmt::{self, Write};
+
+use crate::error::Result;
+use crate::prelude::EINVAL;
+
+/// A mutable reference to a byte buffer where a string can be written into.
+///
+/// # Invariants
+///
+/// `buffer` is always null terminated.
+pub(crate) struct RawWriter<'a> {
+ buffer: &'a mut [u8],
+ pos: usize,
+}
+
+impl<'a> RawWriter<'a> {
+ /// Create a new `RawWriter` instance.
+ fn new(buffer: &'a mut [u8]) -> Result<RawWriter<'a>> {
+ *(buffer.last_mut().ok_or(EINVAL)?) = 0;
+
+ // INVARIANT: We null terminated the buffer above.
+ Ok(Self { buffer, pos: 0 })
+ }
+
+ pub(crate) fn from_array<const N: usize>(
+ a: &'a mut [crate::ffi::c_char; N],
+ ) -> Result<RawWriter<'a>> {
+ Self::new(
+ // SAFETY: the buffer of `a` is valid for read and write as `u8` for
+ // at least `N` bytes.
+ unsafe { core::slice::from_raw_parts_mut(a.as_mut_ptr().cast::<u8>(), N) },
+ )
+ }
+}
+
+impl Write for RawWriter<'_> {
+ fn write_str(&mut self, s: &str) -> fmt::Result {
+ let bytes = s.as_bytes();
+ let len = bytes.len();
+
+ // We do not want to overwrite our null terminator
+ if self.pos + len > self.buffer.len() - 1 {
+ return Err(fmt::Error);
+ }
+
+ // INVARIANT: We are not overwriting the last byte
+ self.buffer[self.pos..self.pos + len].copy_from_slice(bytes);
+
+ self.pos += len;
+
+ Ok(())
+ }
+}
diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs
new file mode 100644
index 000000000000..7943f43b9575
--- /dev/null
+++ b/rust/kernel/block/mq/request.rs
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This module provides a wrapper for the C `struct request` type.
+//!
+//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h)
+
+use crate::{
+ bindings,
+ block::mq::Operations,
+ error::Result,
+ types::{ARef, AlwaysRefCounted, Opaque},
+};
+use core::{
+ marker::PhantomData,
+ ptr::{addr_of_mut, NonNull},
+ sync::atomic::{AtomicU64, Ordering},
+};
+
+/// A wrapper around a blk-mq [`struct request`]. This represents an IO request.
+///
+/// # Implementation details
+///
+/// There are four states for a request that the Rust bindings care about:
+///
+/// 1. Request is owned by block layer (refcount 0).
+/// 2. Request is owned by driver but with zero [`ARef`]s in existence
+/// (refcount 1).
+/// 3. Request is owned by driver with exactly one [`ARef`] in existence
+/// (refcount 2).
+/// 4. Request is owned by driver with more than one [`ARef`] in existence
+/// (refcount > 2).
+///
+///
+/// We need to track 1 and 2 to ensure we fail tag to request conversions for
+/// requests that are not owned by the driver.
+///
+/// We need to track 3 and 4 to ensure that it is safe to end the request and hand
+/// back ownership to the block layer.
+///
+/// The states are tracked through the private `refcount` field of
+/// `RequestDataWrapper`. This structure lives in the private data area of the C
+/// [`struct request`].
+///
+/// # Invariants
+///
+/// * `self.0` is a valid [`struct request`] created by the C portion of the
+/// kernel.
+/// * The private data area associated with this request must be an initialized
+/// and valid `RequestDataWrapper<T>`.
+/// * `self` is reference counted by atomic modification of
+/// `self.wrapper_ref().refcount()`.
+///
+/// [`struct request`]: srctree/include/linux/blk-mq.h
+///
+#[repr(transparent)]
+pub struct Request<T: Operations>(Opaque<bindings::request>, PhantomData<T>);
+
+impl<T: Operations> Request<T> {
+ /// Create an [`ARef<Request>`] from a [`struct request`] pointer.
+ ///
+ /// # Safety
+ ///
+ /// * The caller must own a refcount on `ptr` that is transferred to the
+ /// returned [`ARef`].
+ /// * The type invariants for [`Request`] must hold for the pointee of `ptr`.
+ ///
+ /// [`struct request`]: srctree/include/linux/blk-mq.h
+ pub(crate) unsafe fn aref_from_raw(ptr: *mut bindings::request) -> ARef<Self> {
+ // INVARIANT: By the safety requirements of this function, invariants are upheld.
+ // SAFETY: By the safety requirement of this function, we own a
+ // reference count that we can pass to `ARef`.
+ unsafe { ARef::from_raw(NonNull::new_unchecked(ptr as *const Self as *mut Self)) }
+ }
+
+ /// Notify the block layer that a request is going to be processed now.
+ ///
+ /// The block layer uses this hook to do proper initializations such as
+ /// starting the timeout timer. It is a requirement that block device
+ /// drivers call this function when starting to process a request.
+ ///
+ /// # Safety
+ ///
+ /// The caller must have exclusive ownership of `self`, that is
+ /// `self.wrapper_ref().refcount() == 2`.
+ pub(crate) unsafe fn start_unchecked(this: &ARef<Self>) {
+ // SAFETY: By type invariant, `self.0` is a valid `struct request` and
+ // we have exclusive access.
+ unsafe { bindings::blk_mq_start_request(this.0.get()) };
+ }
+
+ /// Try to take exclusive ownership of `this` by dropping the refcount to 0.
+ /// This fails if `this` is not the only [`ARef`] pointing to the underlying
+ /// [`Request`].
+ ///
+ /// If the operation is successful, [`Ok`] is returned with a pointer to the
+ /// C [`struct request`]. If the operation fails, `this` is returned in the
+ /// [`Err`] variant.
+ ///
+ /// [`struct request`]: srctree/include/linux/blk-mq.h
+ fn try_set_end(this: ARef<Self>) -> Result<*mut bindings::request, ARef<Self>> {
+ // We can race with `TagSet::tag_to_rq`
+ if let Err(_old) = this.wrapper_ref().refcount().compare_exchange(
+ 2,
+ 0,
+ Ordering::Relaxed,
+ Ordering::Relaxed,
+ ) {
+ return Err(this);
+ }
+
+ let request_ptr = this.0.get();
+ core::mem::forget(this);
+
+ Ok(request_ptr)
+ }
+
+ /// Notify the block layer that the request has been completed without errors.
+ ///
+ /// This function will return [`Err`] if `this` is not the only [`ARef`]
+ /// referencing the request.
+ pub fn end_ok(this: ARef<Self>) -> Result<(), ARef<Self>> {
+ let request_ptr = Self::try_set_end(this)?;
+
+ // SAFETY: By type invariant, `this.0` was a valid `struct request`. The
+ // success of the call to `try_set_end` guarantees that there are no
+ // `ARef`s pointing to this request. Therefore it is safe to hand it
+ // back to the block layer.
+ unsafe { bindings::blk_mq_end_request(request_ptr, bindings::BLK_STS_OK as _) };
+
+ Ok(())
+ }
+
+ /// Return a pointer to the [`RequestDataWrapper`] stored in the private area
+ /// of the request structure.
+ ///
+ /// # Safety
+ ///
+ /// - `this` must point to a valid allocation of size at least size of
+ /// [`Self`] plus size of [`RequestDataWrapper`].
+ pub(crate) unsafe fn wrapper_ptr(this: *mut Self) -> NonNull<RequestDataWrapper> {
+ let request_ptr = this.cast::<bindings::request>();
+ // SAFETY: By safety requirements for this function, `this` is a
+ // valid allocation.
+ let wrapper_ptr =
+ unsafe { bindings::blk_mq_rq_to_pdu(request_ptr).cast::<RequestDataWrapper>() };
+ // SAFETY: By C API contract, wrapper_ptr points to a valid allocation
+ // and is not null.
+ unsafe { NonNull::new_unchecked(wrapper_ptr) }
+ }
+
+ /// Return a reference to the [`RequestDataWrapper`] stored in the private
+ /// area of the request structure.
+ pub(crate) fn wrapper_ref(&self) -> &RequestDataWrapper {
+ // SAFETY: By type invariant, `self.0` is a valid allocation. Further,
+ // the private data associated with this request is initialized and
+ // valid. The existence of `&self` guarantees that the private data is
+ // valid as a shared reference.
+ unsafe { Self::wrapper_ptr(self as *const Self as *mut Self).as_ref() }
+ }
+}
+
+/// A wrapper around data stored in the private area of the C [`struct request`].
+///
+/// [`struct request`]: srctree/include/linux/blk-mq.h
+pub(crate) struct RequestDataWrapper {
+ /// The Rust request refcount has the following states:
+ ///
+ /// - 0: The request is owned by C block layer.
+ /// - 1: The request is owned by Rust abstractions but there are no [`ARef`] references to it.
+ /// - 2+: There are [`ARef`] references to the request.
+ refcount: AtomicU64,
+}
+
+impl RequestDataWrapper {
+ /// Return a reference to the refcount of the request that is embedding
+ /// `self`.
+ pub(crate) fn refcount(&self) -> &AtomicU64 {
+ &self.refcount
+ }
+
+ /// Return a pointer to the refcount of the request that is embedding the
+ /// pointee of `this`.
+ ///
+ /// # Safety
+ ///
+ /// - `this` must point to a live allocation of at least the size of `Self`.
+ pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut AtomicU64 {
+ // SAFETY: Because of the safety requirements of this function, the
+ // field projection is safe.
+ unsafe { addr_of_mut!((*this).refcount) }
+ }
+}
+
+// SAFETY: Exclusive access is thread-safe for `Request`. `Request` has no `&mut
+// self` methods and `&self` methods that mutate `self` are internally
+// synchronized.
+unsafe impl<T: Operations> Send for Request<T> {}
+
+// SAFETY: Shared access is thread-safe for `Request`. `&self` methods that
+// mutate `self` are internally synchronized`
+unsafe impl<T: Operations> Sync for Request<T> {}
+
+/// Store the result of `op(target.load())` in target, returning new value of
+/// target.
+fn atomic_relaxed_op_return(target: &AtomicU64, op: impl Fn(u64) -> u64) -> u64 {
+ let old = target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some(op(x)));
+
+ // SAFETY: Because the operation passed to `fetch_update` above always
+ // return `Some`, `old` will always be `Ok`.
+ let old = unsafe { old.unwrap_unchecked() };
+
+ op(old)
+}
+
+/// Store the result of `op(target.load)` in `target` if `target.load() !=
+/// pred`, returning [`true`] if the target was updated.
+fn atomic_relaxed_op_unless(target: &AtomicU64, op: impl Fn(u64) -> u64, pred: u64) -> bool {
+ target
+ .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| {
+ if x == pred {
+ None
+ } else {
+ Some(op(x))
+ }
+ })
+ .is_ok()
+}
+
+// SAFETY: All instances of `Request<T>` are reference counted. This
+// implementation of `AlwaysRefCounted` ensure that increments to the ref count
+// keeps the object alive in memory at least until a matching reference count
+// decrement is executed.
+unsafe impl<T: Operations> AlwaysRefCounted for Request<T> {
+ fn inc_ref(&self) {
+ let refcount = &self.wrapper_ref().refcount();
+
+ #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))]
+ let updated = atomic_relaxed_op_unless(refcount, |x| x + 1, 0);
+
+ #[cfg(CONFIG_DEBUG_MISC)]
+ if !updated {
+ panic!("Request refcount zero on clone")
+ }
+ }
+
+ unsafe fn dec_ref(obj: core::ptr::NonNull<Self>) {
+ // SAFETY: The type invariants of `ARef` guarantee that `obj` is valid
+ // for read.
+ let wrapper_ptr = unsafe { Self::wrapper_ptr(obj.as_ptr()).as_ptr() };
+ // SAFETY: The type invariant of `Request` guarantees that the private
+ // data area is initialized and valid.
+ let refcount = unsafe { &*RequestDataWrapper::refcount_ptr(wrapper_ptr) };
+
+ #[cfg_attr(not(CONFIG_DEBUG_MISC), allow(unused_variables))]
+ let new_refcount = atomic_relaxed_op_return(refcount, |x| x - 1);
+
+ #[cfg(CONFIG_DEBUG_MISC)]
+ if new_refcount == 0 {
+ panic!("Request reached refcount zero in Rust abstractions");
+ }
+ }
+}
diff --git a/rust/kernel/block/mq/tag_set.rs b/rust/kernel/block/mq/tag_set.rs
new file mode 100644
index 000000000000..00ddcc71dfa2
--- /dev/null
+++ b/rust/kernel/block/mq/tag_set.rs
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This module provides the `TagSet` struct to wrap the C `struct blk_mq_tag_set`.
+//!
+//! C header: [`include/linux/blk-mq.h`](srctree/include/linux/blk-mq.h)
+
+use core::pin::Pin;
+
+use crate::{
+ bindings,
+ block::mq::{operations::OperationsVTable, request::RequestDataWrapper, Operations},
+ error,
+ prelude::PinInit,
+ try_pin_init,
+ types::Opaque,
+};
+use core::{convert::TryInto, marker::PhantomData};
+use macros::{pin_data, pinned_drop};
+
+/// A wrapper for the C `struct blk_mq_tag_set`.
+///
+/// `struct blk_mq_tag_set` contains a `struct list_head` and so must be pinned.
+///
+/// # Invariants
+///
+/// - `inner` is initialized and valid.
+#[pin_data(PinnedDrop)]
+#[repr(transparent)]
+pub struct TagSet<T: Operations> {
+ #[pin]
+ inner: Opaque<bindings::blk_mq_tag_set>,
+ _p: PhantomData<T>,
+}
+
+impl<T: Operations> TagSet<T> {
+ /// Try to create a new tag set
+ pub fn new(
+ nr_hw_queues: u32,
+ num_tags: u32,
+ num_maps: u32,
+ ) -> impl PinInit<Self, error::Error> {
+ // SAFETY: `blk_mq_tag_set` only contains integers and pointers, which
+ // all are allowed to be 0.
+ let tag_set: bindings::blk_mq_tag_set = unsafe { core::mem::zeroed() };
+ let tag_set = core::mem::size_of::<RequestDataWrapper>()
+ .try_into()
+ .map(|cmd_size| {
+ bindings::blk_mq_tag_set {
+ ops: OperationsVTable::<T>::build(),
+ nr_hw_queues,
+ timeout: 0, // 0 means default which is 30Hz in C
+ numa_node: bindings::NUMA_NO_NODE,
+ queue_depth: num_tags,
+ cmd_size,
+ flags: 0,
+ driver_data: core::ptr::null_mut::<crate::ffi::c_void>(),
+ nr_maps: num_maps,
+ ..tag_set
+ }
+ });
+
+ try_pin_init!(TagSet {
+ inner <- PinInit::<_, error::Error>::pin_chain(Opaque::new(tag_set?), |tag_set| {
+ // SAFETY: we do not move out of `tag_set`.
+ let tag_set = unsafe { Pin::get_unchecked_mut(tag_set) };
+ // SAFETY: `tag_set` is a reference to an initialized `blk_mq_tag_set`.
+ error::to_result( unsafe { bindings::blk_mq_alloc_tag_set(tag_set.get())})
+ }),
+ _p: PhantomData,
+ })
+ }
+
+ /// Return the pointer to the wrapped `struct blk_mq_tag_set`
+ pub(crate) fn raw_tag_set(&self) -> *mut bindings::blk_mq_tag_set {
+ self.inner.get()
+ }
+}
+
+#[pinned_drop]
+impl<T: Operations> PinnedDrop for TagSet<T> {
+ fn drop(self: Pin<&mut Self>) {
+ // SAFETY: By type invariant `inner` is valid and has been properly
+ // initialized during construction.
+ unsafe { bindings::blk_mq_free_tag_set(self.inner.get()) };
+ }
+}
diff --git a/rust/kernel/build_assert.rs b/rust/kernel/build_assert.rs
index 9e37120bc69c..6331b15d7c4d 100644
--- a/rust/kernel/build_assert.rs
+++ b/rust/kernel/build_assert.rs
@@ -2,6 +2,9 @@
//! Build-time assert.
+#[doc(hidden)]
+pub use build_error::build_error;
+
/// Fails the build if the code path calling `build_error!` can possibly be executed.
///
/// If the macro is executed in const context, `build_error!` will panic.
@@ -11,7 +14,6 @@
/// # Examples
///
/// ```
-/// # use kernel::build_error;
/// #[inline]
/// fn foo(a: usize) -> usize {
/// a.checked_add(1).unwrap_or_else(|| build_error!("overflow"))
@@ -23,10 +25,10 @@
#[macro_export]
macro_rules! build_error {
() => {{
- $crate::build_error("")
+ $crate::build_assert::build_error("")
}};
($msg:expr) => {{
- $crate::build_error($msg)
+ $crate::build_assert::build_error($msg)
}};
}
@@ -73,12 +75,12 @@ macro_rules! build_error {
macro_rules! build_assert {
($cond:expr $(,)?) => {{
if !$cond {
- $crate::build_error(concat!("assertion failed: ", stringify!($cond)));
+ $crate::build_assert::build_error(concat!("assertion failed: ", stringify!($cond)));
}
}};
($cond:expr, $msg:expr) => {{
if !$cond {
- $crate::build_error($msg);
+ $crate::build_assert::build_error($msg);
}
}};
}
diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs
new file mode 100644
index 000000000000..81d67789b16f
--- /dev/null
+++ b/rust/kernel/cred.rs
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Credentials management.
+//!
+//! C header: [`include/linux/cred.h`](srctree/include/linux/cred.h).
+//!
+//! Reference: <https://www.kernel.org/doc/html/latest/security/credentials.html>
+
+use crate::{
+ bindings,
+ task::Kuid,
+ types::{AlwaysRefCounted, Opaque},
+};
+
+/// Wraps the kernel's `struct cred`.
+///
+/// Credentials are used for various security checks in the kernel.
+///
+/// Most fields of credentials are immutable. When things have their credentials changed, that
+/// happens by replacing the credential instead of changing an existing credential. See the [kernel
+/// documentation][ref] for more info on this.
+///
+/// # Invariants
+///
+/// Instances of this type are always ref-counted, that is, a call to `get_cred` ensures that the
+/// allocation remains valid at least until the matching call to `put_cred`.
+///
+/// [ref]: https://www.kernel.org/doc/html/latest/security/credentials.html
+#[repr(transparent)]
+pub struct Credential(Opaque<bindings::cred>);
+
+// SAFETY:
+// - `Credential::dec_ref` can be called from any thread.
+// - It is okay to send ownership of `Credential` across thread boundaries.
+unsafe impl Send for Credential {}
+
+// SAFETY: It's OK to access `Credential` through shared references from other threads because
+// we're either accessing properties that don't change or that are properly synchronised by C code.
+unsafe impl Sync for Credential {}
+
+impl Credential {
+ /// Creates a reference to a [`Credential`] from a valid pointer.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that `ptr` is valid and remains valid for the lifetime of the
+ /// returned [`Credential`] reference.
+ pub unsafe fn from_ptr<'a>(ptr: *const bindings::cred) -> &'a Credential {
+ // SAFETY: The safety requirements guarantee the validity of the dereference, while the
+ // `Credential` type being transparent makes the cast ok.
+ unsafe { &*ptr.cast() }
+ }
+
+ /// Get the id for this security context.
+ pub fn get_secid(&self) -> u32 {
+ let mut secid = 0;
+ // SAFETY: The invariants of this type ensures that the pointer is valid.
+ unsafe { bindings::security_cred_getsecid(self.0.get(), &mut secid) };
+ secid
+ }
+
+ /// Returns the effective UID of the given credential.
+ pub fn euid(&self) -> Kuid {
+ // SAFETY: By the type invariant, we know that `self.0` is valid. Furthermore, the `euid`
+ // field of a credential is never changed after initialization, so there is no potential
+ // for data races.
+ Kuid::from_raw(unsafe { (*self.0.get()).euid })
+ }
+}
+
+// SAFETY: The type invariants guarantee that `Credential` is always ref-counted.
+unsafe impl AlwaysRefCounted for Credential {
+ fn inc_ref(&self) {
+ // SAFETY: The existence of a shared reference means that the refcount is nonzero.
+ unsafe { bindings::get_cred(self.0.get()) };
+ }
+
+ unsafe fn dec_ref(obj: core::ptr::NonNull<Credential>) {
+ // SAFETY: The safety requirements guarantee that the refcount is nonzero. The cast is okay
+ // because `Credential` has the same representation as `struct cred`.
+ unsafe { bindings::put_cred(obj.cast().as_ptr()) };
+ }
+}
diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs
new file mode 100644
index 000000000000..db2d9658ba47
--- /dev/null
+++ b/rust/kernel/device.rs
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Generic devices that are part of the kernel's driver model.
+//!
+//! C header: [`include/linux/device.h`](srctree/include/linux/device.h)
+
+use crate::{
+ bindings,
+ str::CStr,
+ types::{ARef, Opaque},
+};
+use core::{fmt, ptr};
+
+#[cfg(CONFIG_PRINTK)]
+use crate::c_str;
+
+/// A reference-counted device.
+///
+/// This structure represents the Rust abstraction for a C `struct device`. This implementation
+/// abstracts the usage of an already existing C `struct device` within Rust code that we get
+/// passed from the C side.
+///
+/// An instance of this abstraction can be obtained temporarily or permanent.
+///
+/// A temporary one is bound to the lifetime of the C `struct device` pointer used for creation.
+/// A permanent instance is always reference-counted and hence not restricted by any lifetime
+/// boundaries.
+///
+/// For subsystems it is recommended to create a permanent instance to wrap into a subsystem
+/// specific device structure (e.g. `pci::Device`). This is useful for passing it to drivers in
+/// `T::probe()`, such that a driver can store the `ARef<Device>` (equivalent to storing a
+/// `struct device` pointer in a C driver) for arbitrary purposes, e.g. allocating DMA coherent
+/// memory.
+///
+/// # Invariants
+///
+/// A `Device` instance represents a valid `struct device` created by the C portion of the kernel.
+///
+/// Instances of this type are always reference-counted, that is, a call to `get_device` ensures
+/// that the allocation remains valid at least until the matching call to `put_device`.
+///
+/// `bindings::device::release` is valid to be called from any thread, hence `ARef<Device>` can be
+/// dropped from any thread.
+#[repr(transparent)]
+pub struct Device(Opaque<bindings::device>);
+
+impl Device {
+ /// Creates a new reference-counted abstraction instance of an existing `struct device` pointer.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that `ptr` is valid, non-null, and has a non-zero reference count,
+ /// i.e. it must be ensured that the reference count of the C `struct device` `ptr` points to
+ /// can't drop to zero, for the duration of this function call.
+ ///
+ /// It must also be ensured that `bindings::device::release` can be called from any thread.
+ /// While not officially documented, this should be the case for any `struct device`.
+ pub unsafe fn get_device(ptr: *mut bindings::device) -> ARef<Self> {
+ // SAFETY: By the safety requirements ptr is valid
+ unsafe { Self::as_ref(ptr) }.into()
+ }
+
+ /// Obtain the raw `struct device *`.
+ pub(crate) fn as_raw(&self) -> *mut bindings::device {
+ self.0.get()
+ }
+
+ /// Convert a raw C `struct device` pointer to a `&'a Device`.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that `ptr` is valid, non-null, and has a non-zero reference count,
+ /// i.e. it must be ensured that the reference count of the C `struct device` `ptr` points to
+ /// can't drop to zero, for the duration of this function call and the entire duration when the
+ /// returned reference exists.
+ pub unsafe fn as_ref<'a>(ptr: *mut bindings::device) -> &'a Self {
+ // SAFETY: Guaranteed by the safety requirements of the function.
+ unsafe { &*ptr.cast() }
+ }
+
+ /// Prints an emergency-level message (level 0) prefixed with device information.
+ ///
+ /// More details are available from [`dev_emerg`].
+ ///
+ /// [`dev_emerg`]: crate::dev_emerg
+ pub fn pr_emerg(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_EMERG, args) };
+ }
+
+ /// Prints an alert-level message (level 1) prefixed with device information.
+ ///
+ /// More details are available from [`dev_alert`].
+ ///
+ /// [`dev_alert`]: crate::dev_alert
+ pub fn pr_alert(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_ALERT, args) };
+ }
+
+ /// Prints a critical-level message (level 2) prefixed with device information.
+ ///
+ /// More details are available from [`dev_crit`].
+ ///
+ /// [`dev_crit`]: crate::dev_crit
+ pub fn pr_crit(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_CRIT, args) };
+ }
+
+ /// Prints an error-level message (level 3) prefixed with device information.
+ ///
+ /// More details are available from [`dev_err`].
+ ///
+ /// [`dev_err`]: crate::dev_err
+ pub fn pr_err(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_ERR, args) };
+ }
+
+ /// Prints a warning-level message (level 4) prefixed with device information.
+ ///
+ /// More details are available from [`dev_warn`].
+ ///
+ /// [`dev_warn`]: crate::dev_warn
+ pub fn pr_warn(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_WARNING, args) };
+ }
+
+ /// Prints a notice-level message (level 5) prefixed with device information.
+ ///
+ /// More details are available from [`dev_notice`].
+ ///
+ /// [`dev_notice`]: crate::dev_notice
+ pub fn pr_notice(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_NOTICE, args) };
+ }
+
+ /// Prints an info-level message (level 6) prefixed with device information.
+ ///
+ /// More details are available from [`dev_info`].
+ ///
+ /// [`dev_info`]: crate::dev_info
+ pub fn pr_info(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_INFO, args) };
+ }
+
+ /// Prints a debug-level message (level 7) prefixed with device information.
+ ///
+ /// More details are available from [`dev_dbg`].
+ ///
+ /// [`dev_dbg`]: crate::dev_dbg
+ pub fn pr_dbg(&self, args: fmt::Arguments<'_>) {
+ if cfg!(debug_assertions) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_DEBUG, args) };
+ }
+ }
+
+ /// Prints the provided message to the console.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that `klevel` is null-terminated; in particular, one of the
+ /// `KERN_*`constants, for example, `KERN_CRIT`, `KERN_ALERT`, etc.
+ #[cfg_attr(not(CONFIG_PRINTK), allow(unused_variables))]
+ unsafe fn printk(&self, klevel: &[u8], msg: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated and one of the kernel constants. `self.as_raw`
+ // is valid because `self` is valid. The "%pA" format string expects a pointer to
+ // `fmt::Arguments`, which is what we're passing as the last argument.
+ #[cfg(CONFIG_PRINTK)]
+ unsafe {
+ bindings::_dev_printk(
+ klevel as *const _ as *const crate::ffi::c_char,
+ self.as_raw(),
+ c_str!("%pA").as_char_ptr(),
+ &msg as *const _ as *const crate::ffi::c_void,
+ )
+ };
+ }
+
+ /// Checks if property is present or not.
+ pub fn property_present(&self, name: &CStr) -> bool {
+ // SAFETY: By the invariant of `CStr`, `name` is null-terminated.
+ unsafe { bindings::device_property_present(self.as_raw().cast_const(), name.as_char_ptr()) }
+ }
+}
+
+// SAFETY: Instances of `Device` are always reference-counted.
+unsafe impl crate::types::AlwaysRefCounted for Device {
+ fn inc_ref(&self) {
+ // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero.
+ unsafe { bindings::get_device(self.as_raw()) };
+ }
+
+ unsafe fn dec_ref(obj: ptr::NonNull<Self>) {
+ // SAFETY: The safety requirements guarantee that the refcount is non-zero.
+ unsafe { bindings::put_device(obj.cast().as_ptr()) }
+ }
+}
+
+// SAFETY: As by the type invariant `Device` can be sent to any thread.
+unsafe impl Send for Device {}
+
+// SAFETY: `Device` can be shared among threads because all immutable methods are protected by the
+// synchronization in `struct device`.
+unsafe impl Sync for Device {}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! dev_printk {
+ ($method:ident, $dev:expr, $($f:tt)*) => {
+ {
+ ($dev).$method(core::format_args!($($f)*));
+ }
+ }
+}
+
+/// Prints an emergency-level message (level 0) prefixed with device information.
+///
+/// This level should be used if the system is unusable.
+///
+/// Equivalent to the kernel's `dev_emerg` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and `alloc::format!`.
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_emerg!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_emerg {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_emerg, $($f)*); }
+}
+
+/// Prints an alert-level message (level 1) prefixed with device information.
+///
+/// This level should be used if action must be taken immediately.
+///
+/// Equivalent to the kernel's `dev_alert` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and `alloc::format!`.
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_alert!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_alert {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_alert, $($f)*); }
+}
+
+/// Prints a critical-level message (level 2) prefixed with device information.
+///
+/// This level should be used in critical conditions.
+///
+/// Equivalent to the kernel's `dev_crit` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and `alloc::format!`.
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_crit!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_crit {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_crit, $($f)*); }
+}
+
+/// Prints an error-level message (level 3) prefixed with device information.
+///
+/// This level should be used in error conditions.
+///
+/// Equivalent to the kernel's `dev_err` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and `alloc::format!`.
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_err!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_err {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_err, $($f)*); }
+}
+
+/// Prints a warning-level message (level 4) prefixed with device information.
+///
+/// This level should be used in warning conditions.
+///
+/// Equivalent to the kernel's `dev_warn` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and `alloc::format!`.
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_warn!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_warn {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_warn, $($f)*); }
+}
+
+/// Prints a notice-level message (level 5) prefixed with device information.
+///
+/// This level should be used in normal but significant conditions.
+///
+/// Equivalent to the kernel's `dev_notice` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and `alloc::format!`.
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_notice!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_notice {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_notice, $($f)*); }
+}
+
+/// Prints an info-level message (level 6) prefixed with device information.
+///
+/// This level should be used for informational messages.
+///
+/// Equivalent to the kernel's `dev_info` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and `alloc::format!`.
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_info!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_info {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_info, $($f)*); }
+}
+
+/// Prints a debug-level message (level 7) prefixed with device information.
+///
+/// This level should be used for debug messages.
+///
+/// Equivalent to the kernel's `dev_dbg` macro, except that it doesn't support dynamic debug yet.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and `alloc::format!`.
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_dbg!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_dbg {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_dbg, $($f)*); }
+}
diff --git a/rust/kernel/device_id.rs b/rust/kernel/device_id.rs
new file mode 100644
index 000000000000..e5859217a579
--- /dev/null
+++ b/rust/kernel/device_id.rs
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Generic implementation of device IDs.
+//!
+//! Each bus / subsystem that matches device and driver through a bus / subsystem specific ID is
+//! expected to implement [`RawDeviceId`].
+
+use core::mem::MaybeUninit;
+
+/// Marker trait to indicate a Rust device ID type represents a corresponding C device ID type.
+///
+/// This is meant to be implemented by buses/subsystems so that they can use [`IdTable`] to
+/// guarantee (at compile-time) zero-termination of device id tables provided by drivers.
+///
+/// # Safety
+///
+/// Implementers must ensure that:
+/// - `Self` is layout-compatible with [`RawDeviceId::RawType`]; i.e. it's safe to transmute to
+/// `RawDeviceId`.
+///
+/// This requirement is needed so `IdArray::new` can convert `Self` to `RawType` when building
+/// the ID table.
+///
+/// Ideally, this should be achieved using a const function that does conversion instead of
+/// transmute; however, const trait functions relies on `const_trait_impl` unstable feature,
+/// which is broken/gone in Rust 1.73.
+///
+/// - `DRIVER_DATA_OFFSET` is the offset of context/data field of the device ID (usually named
+/// `driver_data`) of the device ID, the field is suitable sized to write a `usize` value.
+///
+/// Similar to the previous requirement, the data should ideally be added during `Self` to
+/// `RawType` conversion, but there's currently no way to do it when using traits in const.
+pub unsafe trait RawDeviceId {
+ /// The raw type that holds the device id.
+ ///
+ /// Id tables created from [`Self`] are going to hold this type in its zero-terminated array.
+ type RawType: Copy;
+
+ /// The offset to the context/data field.
+ const DRIVER_DATA_OFFSET: usize;
+
+ /// The index stored at `DRIVER_DATA_OFFSET` of the implementor of the [`RawDeviceId`] trait.
+ fn index(&self) -> usize;
+}
+
+/// A zero-terminated device id array.
+#[repr(C)]
+pub struct RawIdArray<T: RawDeviceId, const N: usize> {
+ ids: [T::RawType; N],
+ sentinel: MaybeUninit<T::RawType>,
+}
+
+impl<T: RawDeviceId, const N: usize> RawIdArray<T, N> {
+ #[doc(hidden)]
+ pub const fn size(&self) -> usize {
+ core::mem::size_of::<Self>()
+ }
+}
+
+/// A zero-terminated device id array, followed by context data.
+#[repr(C)]
+pub struct IdArray<T: RawDeviceId, U, const N: usize> {
+ raw_ids: RawIdArray<T, N>,
+ id_infos: [U; N],
+}
+
+impl<T: RawDeviceId, U, const N: usize> IdArray<T, U, N> {
+ /// Creates a new instance of the array.
+ ///
+ /// The contents are derived from the given identifiers and context information.
+ pub const fn new(ids: [(T, U); N]) -> Self {
+ let mut raw_ids = [const { MaybeUninit::<T::RawType>::uninit() }; N];
+ let mut infos = [const { MaybeUninit::uninit() }; N];
+
+ let mut i = 0usize;
+ while i < N {
+ // SAFETY: by the safety requirement of `RawDeviceId`, we're guaranteed that `T` is
+ // layout-wise compatible with `RawType`.
+ raw_ids[i] = unsafe { core::mem::transmute_copy(&ids[i].0) };
+ // SAFETY: by the safety requirement of `RawDeviceId`, this would be effectively
+ // `raw_ids[i].driver_data = i;`.
+ unsafe {
+ raw_ids[i]
+ .as_mut_ptr()
+ .byte_offset(T::DRIVER_DATA_OFFSET as _)
+ .cast::<usize>()
+ .write(i);
+ }
+
+ // SAFETY: this is effectively a move: `infos[i] = ids[i].1`. We make a copy here but
+ // later forget `ids`.
+ infos[i] = MaybeUninit::new(unsafe { core::ptr::read(&ids[i].1) });
+ i += 1;
+ }
+
+ core::mem::forget(ids);
+
+ Self {
+ raw_ids: RawIdArray {
+ // SAFETY: this is effectively `array_assume_init`, which is unstable, so we use
+ // `transmute_copy` instead. We have initialized all elements of `raw_ids` so this
+ // `array_assume_init` is safe.
+ ids: unsafe { core::mem::transmute_copy(&raw_ids) },
+ sentinel: MaybeUninit::zeroed(),
+ },
+ // SAFETY: We have initialized all elements of `infos` so this `array_assume_init` is
+ // safe.
+ id_infos: unsafe { core::mem::transmute_copy(&infos) },
+ }
+ }
+
+ /// Reference to the contained [`RawIdArray`].
+ pub const fn raw_ids(&self) -> &RawIdArray<T, N> {
+ &self.raw_ids
+ }
+}
+
+/// A device id table.
+///
+/// This trait is only implemented by `IdArray`.
+///
+/// The purpose of this trait is to allow `&'static dyn IdArray<T, U>` to be in context when `N` in
+/// `IdArray` doesn't matter.
+pub trait IdTable<T: RawDeviceId, U> {
+ /// Obtain the pointer to the ID table.
+ fn as_ptr(&self) -> *const T::RawType;
+
+ /// Obtain the pointer to the bus specific device ID from an index.
+ fn id(&self, index: usize) -> &T::RawType;
+
+ /// Obtain the pointer to the driver-specific information from an index.
+ fn info(&self, index: usize) -> &U;
+}
+
+impl<T: RawDeviceId, U, const N: usize> IdTable<T, U> for IdArray<T, U, N> {
+ fn as_ptr(&self) -> *const T::RawType {
+ // This cannot be `self.ids.as_ptr()`, as the return pointer must have correct provenance
+ // to access the sentinel.
+ (self as *const Self).cast()
+ }
+
+ fn id(&self, index: usize) -> &T::RawType {
+ &self.raw_ids.ids[index]
+ }
+
+ fn info(&self, index: usize) -> &U {
+ &self.id_infos[index]
+ }
+}
+
+/// Create device table alias for modpost.
+#[macro_export]
+macro_rules! module_device_table {
+ ($table_type: literal, $module_table_name:ident, $table_name:ident) => {
+ #[rustfmt::skip]
+ #[export_name =
+ concat!("__mod_device_table__", $table_type,
+ "__", module_path!(),
+ "_", line!(),
+ "_", stringify!($table_name))
+ ]
+ static $module_table_name: [core::mem::MaybeUninit<u8>; $table_name.raw_ids().size()] =
+ unsafe { core::mem::transmute_copy($table_name.raw_ids()) };
+ };
+}
diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs
new file mode 100644
index 000000000000..942376f6f3af
--- /dev/null
+++ b/rust/kernel/devres.rs
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Devres abstraction
+//!
+//! [`Devres`] represents an abstraction for the kernel devres (device resource management)
+//! implementation.
+
+use crate::{
+ alloc::Flags,
+ bindings,
+ device::Device,
+ error::{Error, Result},
+ ffi::c_void,
+ prelude::*,
+ revocable::Revocable,
+ sync::Arc,
+ types::ARef,
+};
+
+use core::ops::Deref;
+
+#[pin_data]
+struct DevresInner<T> {
+ dev: ARef<Device>,
+ callback: unsafe extern "C" fn(*mut c_void),
+ #[pin]
+ data: Revocable<T>,
+}
+
+/// This abstraction is meant to be used by subsystems to containerize [`Device`] bound resources to
+/// manage their lifetime.
+///
+/// [`Device`] bound resources should be freed when either the resource goes out of scope or the
+/// [`Device`] is unbound respectively, depending on what happens first.
+///
+/// To achieve that [`Devres`] registers a devres callback on creation, which is called once the
+/// [`Device`] is unbound, revoking access to the encapsulated resource (see also [`Revocable`]).
+///
+/// After the [`Devres`] has been unbound it is not possible to access the encapsulated resource
+/// anymore.
+///
+/// [`Devres`] users should make sure to simply free the corresponding backing resource in `T`'s
+/// [`Drop`] implementation.
+///
+/// # Example
+///
+/// ```no_run
+/// # use kernel::{bindings, c_str, device::Device, devres::Devres, io::{Io, IoRaw}};
+/// # use core::ops::Deref;
+///
+/// // See also [`pci::Bar`] for a real example.
+/// struct IoMem<const SIZE: usize>(IoRaw<SIZE>);
+///
+/// impl<const SIZE: usize> IoMem<SIZE> {
+/// /// # Safety
+/// ///
+/// /// [`paddr`, `paddr` + `SIZE`) must be a valid MMIO region that is mappable into the CPUs
+/// /// virtual address space.
+/// unsafe fn new(paddr: usize) -> Result<Self>{
+/// // SAFETY: By the safety requirements of this function [`paddr`, `paddr` + `SIZE`) is
+/// // valid for `ioremap`.
+/// let addr = unsafe { bindings::ioremap(paddr as _, SIZE as _) };
+/// if addr.is_null() {
+/// return Err(ENOMEM);
+/// }
+///
+/// Ok(IoMem(IoRaw::new(addr as _, SIZE)?))
+/// }
+/// }
+///
+/// impl<const SIZE: usize> Drop for IoMem<SIZE> {
+/// fn drop(&mut self) {
+/// // SAFETY: `self.0.addr()` is guaranteed to be properly mapped by `Self::new`.
+/// unsafe { bindings::iounmap(self.0.addr() as _); };
+/// }
+/// }
+///
+/// impl<const SIZE: usize> Deref for IoMem<SIZE> {
+/// type Target = Io<SIZE>;
+///
+/// fn deref(&self) -> &Self::Target {
+/// // SAFETY: The memory range stored in `self` has been properly mapped in `Self::new`.
+/// unsafe { Io::from_raw(&self.0) }
+/// }
+/// }
+/// # fn no_run() -> Result<(), Error> {
+/// # // SAFETY: Invalid usage; just for the example to get an `ARef<Device>` instance.
+/// # let dev = unsafe { Device::get_device(core::ptr::null_mut()) };
+///
+/// // SAFETY: Invalid usage for example purposes.
+/// let iomem = unsafe { IoMem::<{ core::mem::size_of::<u32>() }>::new(0xBAAAAAAD)? };
+/// let devres = Devres::new(&dev, iomem, GFP_KERNEL)?;
+///
+/// let res = devres.try_access().ok_or(ENXIO)?;
+/// res.writel(0x42, 0x0);
+/// # Ok(())
+/// # }
+/// ```
+pub struct Devres<T>(Arc<DevresInner<T>>);
+
+impl<T> DevresInner<T> {
+ fn new(dev: &Device, data: T, flags: Flags) -> Result<Arc<DevresInner<T>>> {
+ let inner = Arc::pin_init(
+ pin_init!( DevresInner {
+ dev: dev.into(),
+ callback: Self::devres_callback,
+ data <- Revocable::new(data),
+ }),
+ flags,
+ )?;
+
+ // Convert `Arc<DevresInner>` into a raw pointer and make devres own this reference until
+ // `Self::devres_callback` is called.
+ let data = inner.clone().into_raw();
+
+ // SAFETY: `devm_add_action` guarantees to call `Self::devres_callback` once `dev` is
+ // detached.
+ let ret =
+ unsafe { bindings::devm_add_action(dev.as_raw(), Some(inner.callback), data as _) };
+
+ if ret != 0 {
+ // SAFETY: We just created another reference to `inner` in order to pass it to
+ // `bindings::devm_add_action`. If `bindings::devm_add_action` fails, we have to drop
+ // this reference accordingly.
+ let _ = unsafe { Arc::from_raw(data) };
+ return Err(Error::from_errno(ret));
+ }
+
+ Ok(inner)
+ }
+
+ fn as_ptr(&self) -> *const Self {
+ self as _
+ }
+
+ fn remove_action(this: &Arc<Self>) {
+ // SAFETY:
+ // - `self.inner.dev` is a valid `Device`,
+ // - the `action` and `data` pointers are the exact same ones as given to devm_add_action()
+ // previously,
+ // - `self` is always valid, even if the action has been released already.
+ let ret = unsafe {
+ bindings::devm_remove_action_nowarn(
+ this.dev.as_raw(),
+ Some(this.callback),
+ this.as_ptr() as _,
+ )
+ };
+
+ if ret == 0 {
+ // SAFETY: We leaked an `Arc` reference to devm_add_action() in `DevresInner::new`; if
+ // devm_remove_action_nowarn() was successful we can (and have to) claim back ownership
+ // of this reference.
+ let _ = unsafe { Arc::from_raw(this.as_ptr()) };
+ }
+ }
+
+ #[allow(clippy::missing_safety_doc)]
+ unsafe extern "C" fn devres_callback(ptr: *mut kernel::ffi::c_void) {
+ let ptr = ptr as *mut DevresInner<T>;
+ // Devres owned this memory; now that we received the callback, drop the `Arc` and hence the
+ // reference.
+ // SAFETY: Safe, since we leaked an `Arc` reference to devm_add_action() in
+ // `DevresInner::new`.
+ let inner = unsafe { Arc::from_raw(ptr) };
+
+ inner.data.revoke();
+ }
+}
+
+impl<T> Devres<T> {
+ /// Creates a new [`Devres`] instance of the given `data`. The `data` encapsulated within the
+ /// returned `Devres` instance' `data` will be revoked once the device is detached.
+ pub fn new(dev: &Device, data: T, flags: Flags) -> Result<Self> {
+ let inner = DevresInner::new(dev, data, flags)?;
+
+ Ok(Devres(inner))
+ }
+
+ /// Same as [`Devres::new`], but does not return a `Devres` instance. Instead the given `data`
+ /// is owned by devres and will be revoked / dropped, once the device is detached.
+ pub fn new_foreign_owned(dev: &Device, data: T, flags: Flags) -> Result {
+ let _ = DevresInner::new(dev, data, flags)?;
+
+ Ok(())
+ }
+}
+
+impl<T> Deref for Devres<T> {
+ type Target = Revocable<T>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0.data
+ }
+}
+
+impl<T> Drop for Devres<T> {
+ fn drop(&mut self) {
+ DevresInner::remove_action(&self.0);
+ }
+}
diff --git a/rust/kernel/driver.rs b/rust/kernel/driver.rs
new file mode 100644
index 000000000000..2a16d5e64e6c
--- /dev/null
+++ b/rust/kernel/driver.rs
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Generic support for drivers of different buses (e.g., PCI, Platform, Amba, etc.).
+//!
+//! Each bus / subsystem is expected to implement [`RegistrationOps`], which allows drivers to
+//! register using the [`Registration`] class.
+
+use crate::error::{Error, Result};
+use crate::{device, init::PinInit, of, str::CStr, try_pin_init, types::Opaque, ThisModule};
+use core::pin::Pin;
+use macros::{pin_data, pinned_drop};
+
+/// The [`RegistrationOps`] trait serves as generic interface for subsystems (e.g., PCI, Platform,
+/// Amba, etc.) to provide the corresponding subsystem specific implementation to register /
+/// unregister a driver of the particular type (`RegType`).
+///
+/// For instance, the PCI subsystem would set `RegType` to `bindings::pci_driver` and call
+/// `bindings::__pci_register_driver` from `RegistrationOps::register` and
+/// `bindings::pci_unregister_driver` from `RegistrationOps::unregister`.
+///
+/// # Safety
+///
+/// A call to [`RegistrationOps::unregister`] for a given instance of `RegType` is only valid if a
+/// preceding call to [`RegistrationOps::register`] has been successful.
+pub unsafe trait RegistrationOps {
+ /// The type that holds information about the registration. This is typically a struct defined
+ /// by the C portion of the kernel.
+ type RegType: Default;
+
+ /// Registers a driver.
+ ///
+ /// # Safety
+ ///
+ /// On success, `reg` must remain pinned and valid until the matching call to
+ /// [`RegistrationOps::unregister`].
+ unsafe fn register(
+ reg: &Opaque<Self::RegType>,
+ name: &'static CStr,
+ module: &'static ThisModule,
+ ) -> Result;
+
+ /// Unregisters a driver previously registered with [`RegistrationOps::register`].
+ ///
+ /// # Safety
+ ///
+ /// Must only be called after a preceding successful call to [`RegistrationOps::register`] for
+ /// the same `reg`.
+ unsafe fn unregister(reg: &Opaque<Self::RegType>);
+}
+
+/// A [`Registration`] is a generic type that represents the registration of some driver type (e.g.
+/// `bindings::pci_driver`). Therefore a [`Registration`] must be initialized with a type that
+/// implements the [`RegistrationOps`] trait, such that the generic `T::register` and
+/// `T::unregister` calls result in the subsystem specific registration calls.
+///
+///Once the `Registration` structure is dropped, the driver is unregistered.
+#[pin_data(PinnedDrop)]
+pub struct Registration<T: RegistrationOps> {
+ #[pin]
+ reg: Opaque<T::RegType>,
+}
+
+// SAFETY: `Registration` has no fields or methods accessible via `&Registration`, so it is safe to
+// share references to it with multiple threads as nothing can be done.
+unsafe impl<T: RegistrationOps> Sync for Registration<T> {}
+
+// SAFETY: Both registration and unregistration are implemented in C and safe to be performed from
+// any thread, so `Registration` is `Send`.
+unsafe impl<T: RegistrationOps> Send for Registration<T> {}
+
+impl<T: RegistrationOps> Registration<T> {
+ /// Creates a new instance of the registration object.
+ pub fn new(name: &'static CStr, module: &'static ThisModule) -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {
+ reg <- Opaque::try_ffi_init(|ptr: *mut T::RegType| {
+ // SAFETY: `try_ffi_init` guarantees that `ptr` is valid for write.
+ unsafe { ptr.write(T::RegType::default()) };
+
+ // SAFETY: `try_ffi_init` guarantees that `ptr` is valid for write, and it has
+ // just been initialised above, so it's also valid for read.
+ let drv = unsafe { &*(ptr as *const Opaque<T::RegType>) };
+
+ // SAFETY: `drv` is guaranteed to be pinned until `T::unregister`.
+ unsafe { T::register(drv, name, module) }
+ }),
+ })
+ }
+}
+
+#[pinned_drop]
+impl<T: RegistrationOps> PinnedDrop for Registration<T> {
+ fn drop(self: Pin<&mut Self>) {
+ // SAFETY: The existence of `self` guarantees that `self.reg` has previously been
+ // successfully registered with `T::register`
+ unsafe { T::unregister(&self.reg) };
+ }
+}
+
+/// Declares a kernel module that exposes a single driver.
+///
+/// It is meant to be used as a helper by other subsystems so they can more easily expose their own
+/// macros.
+#[macro_export]
+macro_rules! module_driver {
+ (<$gen_type:ident>, $driver_ops:ty, { type: $type:ty, $($f:tt)* }) => {
+ type Ops<$gen_type> = $driver_ops;
+
+ #[$crate::prelude::pin_data]
+ struct DriverModule {
+ #[pin]
+ _driver: $crate::driver::Registration<Ops<$type>>,
+ }
+
+ impl $crate::InPlaceModule for DriverModule {
+ fn init(
+ module: &'static $crate::ThisModule
+ ) -> impl $crate::init::PinInit<Self, $crate::error::Error> {
+ $crate::try_pin_init!(Self {
+ _driver <- $crate::driver::Registration::new(
+ <Self as $crate::ModuleMetadata>::NAME,
+ module,
+ ),
+ })
+ }
+ }
+
+ $crate::prelude::module! {
+ type: DriverModule,
+ $($f)*
+ }
+ }
+}
+
+/// The bus independent adapter to match a drivers and a devices.
+///
+/// This trait should be implemented by the bus specific adapter, which represents the connection
+/// of a device and a driver.
+///
+/// It provides bus independent functions for device / driver interactions.
+pub trait Adapter {
+ /// The type holding driver private data about each device id supported by the driver.
+ type IdInfo: 'static;
+
+ /// The [`of::IdTable`] of the corresponding driver.
+ fn of_id_table() -> Option<of::IdTable<Self::IdInfo>>;
+
+ /// Returns the driver's private data from the matching entry in the [`of::IdTable`], if any.
+ ///
+ /// If this returns `None`, it means there is no match with an entry in the [`of::IdTable`].
+ #[cfg(CONFIG_OF)]
+ fn of_id_info(dev: &device::Device) -> Option<&'static Self::IdInfo> {
+ let table = Self::of_id_table()?;
+
+ // SAFETY:
+ // - `table` has static lifetime, hence it's valid for read,
+ // - `dev` is guaranteed to be valid while it's alive, and so is `pdev.as_ref().as_raw()`.
+ let raw_id = unsafe { bindings::of_match_device(table.as_ptr(), dev.as_raw()) };
+
+ if raw_id.is_null() {
+ None
+ } else {
+ // SAFETY: `DeviceId` is a `#[repr(transparent)` wrapper of `struct of_device_id` and
+ // does not add additional invariants, so it's safe to transmute.
+ let id = unsafe { &*raw_id.cast::<of::DeviceId>() };
+
+ Some(table.info(<of::DeviceId as crate::device_id::RawDeviceId>::index(id)))
+ }
+ }
+
+ #[cfg(not(CONFIG_OF))]
+ #[allow(missing_docs)]
+ fn of_id_info(_dev: &device::Device) -> Option<&'static Self::IdInfo> {
+ None
+ }
+
+ /// Returns the driver's private data from the matching entry of any of the ID tables, if any.
+ ///
+ /// If this returns `None`, it means that there is no match in any of the ID tables directly
+ /// associated with a [`device::Device`].
+ fn id_info(dev: &device::Device) -> Option<&'static Self::IdInfo> {
+ let id = Self::of_id_info(dev);
+ if id.is_some() {
+ return id;
+ }
+
+ None
+ }
+}
diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs
index 4f0c1edd63b7..a194d83e6835 100644
--- a/rust/kernel/error.rs
+++ b/rust/kernel/error.rs
@@ -4,15 +4,13 @@
//!
//! C header: [`include/uapi/asm-generic/errno-base.h`](srctree/include/uapi/asm-generic/errno-base.h)
-use crate::str::CStr;
-
-use alloc::{
- alloc::{AllocError, LayoutError},
- collections::TryReserveError,
+use crate::{
+ alloc::{layout::LayoutError, AllocError},
+ str::CStr,
};
-use core::convert::From;
use core::fmt;
+use core::num::NonZeroI32;
use core::num::TryFromIntError;
use core::str::Utf8Error;
@@ -24,7 +22,11 @@ pub mod code {
$(
#[doc = $doc]
)*
- pub const $err: super::Error = super::Error(-(crate::bindings::$err as i32));
+ pub const $err: super::Error =
+ match super::Error::try_from_errno(-(crate::bindings::$err as i32)) {
+ Some(err) => err,
+ None => panic!("Invalid errno in `declare_err!`"),
+ };
};
}
@@ -92,26 +94,36 @@ pub mod code {
///
/// The value is a valid `errno` (i.e. `>= -MAX_ERRNO && < 0`).
#[derive(Clone, Copy, PartialEq, Eq)]
-pub struct Error(core::ffi::c_int);
+pub struct Error(NonZeroI32);
impl Error {
/// Creates an [`Error`] from a kernel error code.
///
/// It is a bug to pass an out-of-range `errno`. `EINVAL` would
/// be returned in such a case.
- pub(crate) fn from_errno(errno: core::ffi::c_int) -> Error {
- if errno < -(bindings::MAX_ERRNO as i32) || errno >= 0 {
+ pub fn from_errno(errno: crate::ffi::c_int) -> Error {
+ if let Some(error) = Self::try_from_errno(errno) {
+ error
+ } else {
// TODO: Make it a `WARN_ONCE` once available.
crate::pr_warn!(
- "attempted to create `Error` with out of range `errno`: {}",
+ "attempted to create `Error` with out of range `errno`: {}\n",
errno
);
- return code::EINVAL;
+ code::EINVAL
}
+ }
- // INVARIANT: The check above ensures the type invariant
- // will hold.
- Error(errno)
+ /// Creates an [`Error`] from a kernel error code.
+ ///
+ /// Returns [`None`] if `errno` is out-of-range.
+ const fn try_from_errno(errno: crate::ffi::c_int) -> Option<Error> {
+ if errno < -(bindings::MAX_ERRNO as i32) || errno >= 0 {
+ return None;
+ }
+
+ // SAFETY: `errno` is checked above to be in a valid range.
+ Some(unsafe { Error::from_errno_unchecked(errno) })
}
/// Creates an [`Error`] from a kernel error code.
@@ -119,29 +131,35 @@ impl Error {
/// # Safety
///
/// `errno` must be within error code range (i.e. `>= -MAX_ERRNO && < 0`).
- unsafe fn from_errno_unchecked(errno: core::ffi::c_int) -> Error {
+ const unsafe fn from_errno_unchecked(errno: crate::ffi::c_int) -> Error {
// INVARIANT: The contract ensures the type invariant
// will hold.
- Error(errno)
+ // SAFETY: The caller guarantees `errno` is non-zero.
+ Error(unsafe { NonZeroI32::new_unchecked(errno) })
}
/// Returns the kernel error code.
- pub fn to_errno(self) -> core::ffi::c_int {
- self.0
+ pub fn to_errno(self) -> crate::ffi::c_int {
+ self.0.get()
+ }
+
+ #[cfg(CONFIG_BLOCK)]
+ pub(crate) fn to_blk_status(self) -> bindings::blk_status_t {
+ // SAFETY: `self.0` is a valid error due to its invariant.
+ unsafe { bindings::errno_to_blk_status(self.0.get()) }
}
/// Returns the error encoded as a pointer.
- #[allow(dead_code)]
- pub(crate) fn to_ptr<T>(self) -> *mut T {
+ pub fn to_ptr<T>(self) -> *mut T {
// SAFETY: `self.0` is a valid error due to its invariant.
- unsafe { bindings::ERR_PTR(self.0.into()) as *mut _ }
+ unsafe { bindings::ERR_PTR(self.0.get() as _) as *mut _ }
}
/// Returns a string representing the error, if one exists.
- #[cfg(not(testlib))]
+ #[cfg(not(any(test, testlib)))]
pub fn name(&self) -> Option<&'static CStr> {
// SAFETY: Just an FFI call, there are no extra safety requirements.
- let ptr = unsafe { bindings::errname(-self.0) };
+ let ptr = unsafe { bindings::errname(-self.0.get()) };
if ptr.is_null() {
None
} else {
@@ -155,7 +173,7 @@ impl Error {
/// When `testlib` is configured, this always returns `None` to avoid the dependency on a
/// kernel function so that tests that use this (e.g., by calling [`Result::unwrap`]) can still
/// run in userspace.
- #[cfg(testlib)]
+ #[cfg(any(test, testlib))]
pub fn name(&self) -> Option<&'static CStr> {
None
}
@@ -166,9 +184,11 @@ impl fmt::Debug for Error {
match self.name() {
// Print out number if no name can be found.
None => f.debug_tuple("Error").field(&-self.0).finish(),
- // SAFETY: These strings are ASCII-only.
Some(name) => f
- .debug_tuple(unsafe { core::str::from_utf8_unchecked(name) })
+ .debug_tuple(
+ // SAFETY: These strings are ASCII-only.
+ unsafe { core::str::from_utf8_unchecked(name) },
+ )
.finish(),
}
}
@@ -192,12 +212,6 @@ impl From<Utf8Error> for Error {
}
}
-impl From<TryReserveError> for Error {
- fn from(_: TryReserveError) -> Error {
- code::ENOMEM
- }
-}
-
impl From<LayoutError> for Error {
fn from(_: LayoutError) -> Error {
code::ENOMEM
@@ -240,7 +254,7 @@ pub type Result<T = (), E = Error> = core::result::Result<T, E>;
/// Converts an integer as returned by a C kernel function to an error if it's negative, and
/// `Ok(())` otherwise.
-pub fn to_result(err: core::ffi::c_int) -> Result {
+pub fn to_result(err: crate::ffi::c_int) -> Result {
if err < 0 {
Err(Error::from_errno(err))
} else {
@@ -263,25 +277,21 @@ pub fn to_result(err: core::ffi::c_int) -> Result {
/// fn devm_platform_ioremap_resource(
/// pdev: &mut PlatformDevice,
/// index: u32,
-/// ) -> Result<*mut core::ffi::c_void> {
-/// // SAFETY: FFI call.
-/// unsafe {
-/// from_err_ptr(bindings::devm_platform_ioremap_resource(
-/// pdev.to_ptr(),
-/// index,
-/// ))
-/// }
+/// ) -> Result<*mut kernel::ffi::c_void> {
+/// // SAFETY: `pdev` points to a valid platform device. There are no safety requirements
+/// // on `index`.
+/// from_err_ptr(unsafe { bindings::devm_platform_ioremap_resource(pdev.to_ptr(), index) })
/// }
/// ```
-// TODO: Remove `dead_code` marker once an in-kernel client is available.
-#[allow(dead_code)]
-pub(crate) fn from_err_ptr<T>(ptr: *mut T) -> Result<*mut T> {
- // CAST: Casting a pointer to `*const core::ffi::c_void` is always valid.
- let const_ptr: *const core::ffi::c_void = ptr.cast();
+pub fn from_err_ptr<T>(ptr: *mut T) -> Result<*mut T> {
+ // CAST: Casting a pointer to `*const crate::ffi::c_void` is always valid.
+ let const_ptr: *const crate::ffi::c_void = ptr.cast();
// SAFETY: The FFI function does not deref the pointer.
if unsafe { bindings::IS_ERR(const_ptr) } {
// SAFETY: The FFI function does not deref the pointer.
let err = unsafe { bindings::PTR_ERR(const_ptr) };
+
+ #[allow(clippy::unnecessary_cast)]
// CAST: If `IS_ERR()` returns `true`,
// then `PTR_ERR()` is guaranteed to return a
// negative value greater-or-equal to `-bindings::MAX_ERRNO`,
@@ -291,8 +301,7 @@ pub(crate) fn from_err_ptr<T>(ptr: *mut T) -> Result<*mut T> {
//
// SAFETY: `IS_ERR()` ensures `err` is a
// negative value greater-or-equal to `-bindings::MAX_ERRNO`.
- #[allow(clippy::unnecessary_cast)]
- return Err(unsafe { Error::from_errno_unchecked(err as core::ffi::c_int) });
+ return Err(unsafe { Error::from_errno_unchecked(err as crate::ffi::c_int) });
}
Ok(ptr)
}
@@ -312,7 +321,7 @@ pub(crate) fn from_err_ptr<T>(ptr: *mut T) -> Result<*mut T> {
/// # use kernel::bindings;
/// unsafe extern "C" fn probe_callback(
/// pdev: *mut bindings::platform_device,
-/// ) -> core::ffi::c_int {
+/// ) -> kernel::ffi::c_int {
/// from_result(|| {
/// let ptr = devm_alloc(pdev)?;
/// bindings::platform_set_drvdata(pdev, ptr);
@@ -320,9 +329,7 @@ pub(crate) fn from_err_ptr<T>(ptr: *mut T) -> Result<*mut T> {
/// })
/// }
/// ```
-// TODO: Remove `dead_code` marker once an in-kernel client is available.
-#[allow(dead_code)]
-pub(crate) fn from_result<T, F>(f: F) -> T
+pub fn from_result<T, F>(f: F) -> T
where
T: From<i16>,
F: FnOnce() -> Result<T>,
diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs
new file mode 100644
index 000000000000..5acc0c02d451
--- /dev/null
+++ b/rust/kernel/faux.rs
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+//! Abstractions for the faux bus.
+//!
+//! This module provides bindings for working with faux devices in kernel modules.
+//!
+//! C header: [`include/linux/device/faux.h`]
+
+use crate::{bindings, device, error::code::*, prelude::*};
+use core::ptr::{addr_of_mut, null, null_mut, NonNull};
+
+/// The registration of a faux device.
+///
+/// This type represents the registration of a [`struct faux_device`]. When an instance of this type
+/// is dropped, its respective faux device will be unregistered from the system.
+///
+/// # Invariants
+///
+/// `self.0` always holds a valid pointer to an initialized and registered [`struct faux_device`].
+///
+/// [`struct faux_device`]: srctree/include/linux/device/faux.h
+#[repr(transparent)]
+pub struct Registration(NonNull<bindings::faux_device>);
+
+impl Registration {
+ /// Create and register a new faux device with the given name.
+ pub fn new(name: &CStr) -> Result<Self> {
+ // SAFETY:
+ // - `name` is copied by this function into its own storage
+ // - `faux_ops` is safe to leave NULL according to the C API
+ let dev = unsafe { bindings::faux_device_create(name.as_char_ptr(), null_mut(), null()) };
+
+ // The above function will return either a valid device, or NULL on failure
+ // INVARIANT: The device will remain registered until faux_device_destroy() is called, which
+ // happens in our Drop implementation.
+ Ok(Self(NonNull::new(dev).ok_or(ENODEV)?))
+ }
+
+ fn as_raw(&self) -> *mut bindings::faux_device {
+ self.0.as_ptr()
+ }
+}
+
+impl AsRef<device::Device> for Registration {
+ fn as_ref(&self) -> &device::Device {
+ // SAFETY: The underlying `device` in `faux_device` is guaranteed by the C API to be
+ // a valid initialized `device`.
+ unsafe { device::Device::as_ref(addr_of_mut!((*self.as_raw()).dev)) }
+ }
+}
+
+impl Drop for Registration {
+ fn drop(&mut self) {
+ // SAFETY: `self.0` is a valid registered faux_device via our type invariants.
+ unsafe { bindings::faux_device_destroy(self.as_raw()) }
+ }
+}
+
+// SAFETY: The faux device API is thread-safe as guaranteed by the device core, as long as
+// faux_device_destroy() is guaranteed to only be called once - which is guaranteed by our type not
+// having Copy/Clone.
+unsafe impl Send for Registration {}
+
+// SAFETY: The faux device API is thread-safe as guaranteed by the device core, as long as
+// faux_device_destroy() is guaranteed to only be called once - which is guaranteed by our type not
+// having Copy/Clone.
+unsafe impl Sync for Registration {}
diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs
new file mode 100644
index 000000000000..c5162fdc95ff
--- /dev/null
+++ b/rust/kernel/firmware.rs
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Firmware abstraction
+//!
+//! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h)
+
+use crate::{bindings, device::Device, error::Error, error::Result, str::CStr};
+use core::ptr::NonNull;
+
+/// # Invariants
+///
+/// One of the following: `bindings::request_firmware`, `bindings::firmware_request_nowarn`,
+/// `bindings::firmware_request_platform`, `bindings::request_firmware_direct`.
+struct FwFunc(
+ unsafe extern "C" fn(*mut *const bindings::firmware, *const u8, *mut bindings::device) -> i32,
+);
+
+impl FwFunc {
+ fn request() -> Self {
+ Self(bindings::request_firmware)
+ }
+
+ fn request_nowarn() -> Self {
+ Self(bindings::firmware_request_nowarn)
+ }
+}
+
+/// Abstraction around a C `struct firmware`.
+///
+/// This is a simple abstraction around the C firmware API. Just like with the C API, firmware can
+/// be requested. Once requested the abstraction provides direct access to the firmware buffer as
+/// `&[u8]`. The firmware is released once [`Firmware`] is dropped.
+///
+/// # Invariants
+///
+/// The pointer is valid, and has ownership over the instance of `struct firmware`.
+///
+/// The `Firmware`'s backing buffer is not modified.
+///
+/// # Examples
+///
+/// ```no_run
+/// # use kernel::{c_str, device::Device, firmware::Firmware};
+///
+/// # fn no_run() -> Result<(), Error> {
+/// # // SAFETY: *NOT* safe, just for the example to get an `ARef<Device>` instance
+/// # let dev = unsafe { Device::get_device(core::ptr::null_mut()) };
+///
+/// let fw = Firmware::request(c_str!("path/to/firmware.bin"), &dev)?;
+/// let blob = fw.data();
+///
+/// # Ok(())
+/// # }
+/// ```
+pub struct Firmware(NonNull<bindings::firmware>);
+
+impl Firmware {
+ fn request_internal(name: &CStr, dev: &Device, func: FwFunc) -> Result<Self> {
+ let mut fw: *mut bindings::firmware = core::ptr::null_mut();
+ let pfw: *mut *mut bindings::firmware = &mut fw;
+
+ // SAFETY: `pfw` is a valid pointer to a NULL initialized `bindings::firmware` pointer.
+ // `name` and `dev` are valid as by their type invariants.
+ let ret = unsafe { func.0(pfw as _, name.as_char_ptr(), dev.as_raw()) };
+ if ret != 0 {
+ return Err(Error::from_errno(ret));
+ }
+
+ // SAFETY: `func` not bailing out with a non-zero error code, guarantees that `fw` is a
+ // valid pointer to `bindings::firmware`.
+ Ok(Firmware(unsafe { NonNull::new_unchecked(fw) }))
+ }
+
+ /// Send a firmware request and wait for it. See also `bindings::request_firmware`.
+ pub fn request(name: &CStr, dev: &Device) -> Result<Self> {
+ Self::request_internal(name, dev, FwFunc::request())
+ }
+
+ /// Send a request for an optional firmware module. See also
+ /// `bindings::firmware_request_nowarn`.
+ pub fn request_nowarn(name: &CStr, dev: &Device) -> Result<Self> {
+ Self::request_internal(name, dev, FwFunc::request_nowarn())
+ }
+
+ fn as_raw(&self) -> *mut bindings::firmware {
+ self.0.as_ptr()
+ }
+
+ /// Returns the size of the requested firmware in bytes.
+ pub fn size(&self) -> usize {
+ // SAFETY: `self.as_raw()` is valid by the type invariant.
+ unsafe { (*self.as_raw()).size }
+ }
+
+ /// Returns the requested firmware as `&[u8]`.
+ pub fn data(&self) -> &[u8] {
+ // SAFETY: `self.as_raw()` is valid by the type invariant. Additionally,
+ // `bindings::firmware` guarantees, if successfully requested, that
+ // `bindings::firmware::data` has a size of `bindings::firmware::size` bytes.
+ unsafe { core::slice::from_raw_parts((*self.as_raw()).data, self.size()) }
+ }
+}
+
+impl Drop for Firmware {
+ fn drop(&mut self) {
+ // SAFETY: `self.as_raw()` is valid by the type invariant.
+ unsafe { bindings::release_firmware(self.as_raw()) };
+ }
+}
+
+// SAFETY: `Firmware` only holds a pointer to a C `struct firmware`, which is safe to be used from
+// any thread.
+unsafe impl Send for Firmware {}
+
+// SAFETY: `Firmware` only holds a pointer to a C `struct firmware`, references to which are safe to
+// be used from any thread.
+unsafe impl Sync for Firmware {}
diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs
new file mode 100644
index 000000000000..0121b38c59e6
--- /dev/null
+++ b/rust/kernel/fs.rs
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Kernel file systems.
+//!
+//! C headers: [`include/linux/fs.h`](srctree/include/linux/fs.h)
+
+pub mod file;
+pub use self::file::{File, LocalFile};
diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs
new file mode 100644
index 000000000000..e03dbe14d62a
--- /dev/null
+++ b/rust/kernel/fs/file.rs
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Files and file descriptors.
+//!
+//! C headers: [`include/linux/fs.h`](srctree/include/linux/fs.h) and
+//! [`include/linux/file.h`](srctree/include/linux/file.h)
+
+use crate::{
+ bindings,
+ cred::Credential,
+ error::{code::*, Error, Result},
+ types::{ARef, AlwaysRefCounted, NotThreadSafe, Opaque},
+};
+use core::ptr;
+
+/// Flags associated with a [`File`].
+pub mod flags {
+ /// File is opened in append mode.
+ pub const O_APPEND: u32 = bindings::O_APPEND;
+
+ /// Signal-driven I/O is enabled.
+ pub const O_ASYNC: u32 = bindings::FASYNC;
+
+ /// Close-on-exec flag is set.
+ pub const O_CLOEXEC: u32 = bindings::O_CLOEXEC;
+
+ /// File was created if it didn't already exist.
+ pub const O_CREAT: u32 = bindings::O_CREAT;
+
+ /// Direct I/O is enabled for this file.
+ pub const O_DIRECT: u32 = bindings::O_DIRECT;
+
+ /// File must be a directory.
+ pub const O_DIRECTORY: u32 = bindings::O_DIRECTORY;
+
+ /// Like [`O_SYNC`] except metadata is not synced.
+ pub const O_DSYNC: u32 = bindings::O_DSYNC;
+
+ /// Ensure that this file is created with the `open(2)` call.
+ pub const O_EXCL: u32 = bindings::O_EXCL;
+
+ /// Large file size enabled (`off64_t` over `off_t`).
+ pub const O_LARGEFILE: u32 = bindings::O_LARGEFILE;
+
+ /// Do not update the file last access time.
+ pub const O_NOATIME: u32 = bindings::O_NOATIME;
+
+ /// File should not be used as process's controlling terminal.
+ pub const O_NOCTTY: u32 = bindings::O_NOCTTY;
+
+ /// If basename of path is a symbolic link, fail open.
+ pub const O_NOFOLLOW: u32 = bindings::O_NOFOLLOW;
+
+ /// File is using nonblocking I/O.
+ pub const O_NONBLOCK: u32 = bindings::O_NONBLOCK;
+
+ /// File is using nonblocking I/O.
+ ///
+ /// This is effectively the same flag as [`O_NONBLOCK`] on all architectures
+ /// except SPARC64.
+ pub const O_NDELAY: u32 = bindings::O_NDELAY;
+
+ /// Used to obtain a path file descriptor.
+ pub const O_PATH: u32 = bindings::O_PATH;
+
+ /// Write operations on this file will flush data and metadata.
+ pub const O_SYNC: u32 = bindings::O_SYNC;
+
+ /// This file is an unnamed temporary regular file.
+ pub const O_TMPFILE: u32 = bindings::O_TMPFILE;
+
+ /// File should be truncated to length 0.
+ pub const O_TRUNC: u32 = bindings::O_TRUNC;
+
+ /// Bitmask for access mode flags.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::fs::file;
+ /// # fn do_something() {}
+ /// # let flags = 0;
+ /// if (flags & file::flags::O_ACCMODE) == file::flags::O_RDONLY {
+ /// do_something();
+ /// }
+ /// ```
+ pub const O_ACCMODE: u32 = bindings::O_ACCMODE;
+
+ /// File is read only.
+ pub const O_RDONLY: u32 = bindings::O_RDONLY;
+
+ /// File is write only.
+ pub const O_WRONLY: u32 = bindings::O_WRONLY;
+
+ /// File can be both read and written.
+ pub const O_RDWR: u32 = bindings::O_RDWR;
+}
+
+/// Wraps the kernel's `struct file`. Thread safe.
+///
+/// This represents an open file rather than a file on a filesystem. Processes generally reference
+/// open files using file descriptors. However, file descriptors are not the same as files. A file
+/// descriptor is just an integer that corresponds to a file, and a single file may be referenced
+/// by multiple file descriptors.
+///
+/// # Refcounting
+///
+/// Instances of this type are reference-counted. The reference count is incremented by the
+/// `fget`/`get_file` functions and decremented by `fput`. The Rust type `ARef<File>` represents a
+/// pointer that owns a reference count on the file.
+///
+/// Whenever a process opens a file descriptor (fd), it stores a pointer to the file in its fd
+/// table (`struct files_struct`). This pointer owns a reference count to the file, ensuring the
+/// file isn't prematurely deleted while the file descriptor is open. In Rust terminology, the
+/// pointers in `struct files_struct` are `ARef<File>` pointers.
+///
+/// ## Light refcounts
+///
+/// Whenever a process has an fd to a file, it may use something called a "light refcount" as a
+/// performance optimization. Light refcounts are acquired by calling `fdget` and released with
+/// `fdput`. The idea behind light refcounts is that if the fd is not closed between the calls to
+/// `fdget` and `fdput`, then the refcount cannot hit zero during that time, as the `struct
+/// files_struct` holds a reference until the fd is closed. This means that it's safe to access the
+/// file even if `fdget` does not increment the refcount.
+///
+/// The requirement that the fd is not closed during a light refcount applies globally across all
+/// threads - not just on the thread using the light refcount. For this reason, light refcounts are
+/// only used when the `struct files_struct` is not shared with other threads, since this ensures
+/// that other unrelated threads cannot suddenly start using the fd and close it. Therefore,
+/// calling `fdget` on a shared `struct files_struct` creates a normal refcount instead of a light
+/// refcount.
+///
+/// Light reference counts must be released with `fdput` before the system call returns to
+/// userspace. This means that if you wait until the current system call returns to userspace, then
+/// all light refcounts that existed at the time have gone away.
+///
+/// ### The file position
+///
+/// Each `struct file` has a position integer, which is protected by the `f_pos_lock` mutex.
+/// However, if the `struct file` is not shared, then the kernel may avoid taking the lock as a
+/// performance optimization.
+///
+/// The condition for avoiding the `f_pos_lock` mutex is different from the condition for using
+/// `fdget`. With `fdget`, you may avoid incrementing the refcount as long as the current fd table
+/// is not shared; it is okay if there are other fd tables that also reference the same `struct
+/// file`. However, `fdget_pos` can only avoid taking the `f_pos_lock` if the entire `struct file`
+/// is not shared, as different processes with an fd to the same `struct file` share the same
+/// position.
+///
+/// To represent files that are not thread safe due to this optimization, the [`LocalFile`] type is
+/// used.
+///
+/// ## Rust references
+///
+/// The reference type `&File` is similar to light refcounts:
+///
+/// * `&File` references don't own a reference count. They can only exist as long as the reference
+/// count stays positive, and can only be created when there is some mechanism in place to ensure
+/// this.
+///
+/// * The Rust borrow-checker normally ensures this by enforcing that the `ARef<File>` from which
+/// a `&File` is created outlives the `&File`.
+///
+/// * Using the unsafe [`File::from_raw_file`] means that it is up to the caller to ensure that the
+/// `&File` only exists while the reference count is positive.
+///
+/// * You can think of `fdget` as using an fd to look up an `ARef<File>` in the `struct
+/// files_struct` and create an `&File` from it. The "fd cannot be closed" rule is like the Rust
+/// rule "the `ARef<File>` must outlive the `&File`".
+///
+/// # Invariants
+///
+/// * All instances of this type are refcounted using the `f_count` field.
+/// * There must not be any active calls to `fdget_pos` on this file that did not take the
+/// `f_pos_lock` mutex.
+#[repr(transparent)]
+pub struct File {
+ inner: Opaque<bindings::file>,
+}
+
+// SAFETY: This file is known to not have any active `fdget_pos` calls that did not take the
+// `f_pos_lock` mutex, so it is safe to transfer it between threads.
+unsafe impl Send for File {}
+
+// SAFETY: This file is known to not have any active `fdget_pos` calls that did not take the
+// `f_pos_lock` mutex, so it is safe to access its methods from several threads in parallel.
+unsafe impl Sync for File {}
+
+// SAFETY: The type invariants guarantee that `File` is always ref-counted. This implementation
+// makes `ARef<File>` own a normal refcount.
+unsafe impl AlwaysRefCounted for File {
+ #[inline]
+ fn inc_ref(&self) {
+ // SAFETY: The existence of a shared reference means that the refcount is nonzero.
+ unsafe { bindings::get_file(self.as_ptr()) };
+ }
+
+ #[inline]
+ unsafe fn dec_ref(obj: ptr::NonNull<File>) {
+ // SAFETY: To call this method, the caller passes us ownership of a normal refcount, so we
+ // may drop it. The cast is okay since `File` has the same representation as `struct file`.
+ unsafe { bindings::fput(obj.cast().as_ptr()) }
+ }
+}
+
+/// Wraps the kernel's `struct file`. Not thread safe.
+///
+/// This type represents a file that is not known to be safe to transfer across thread boundaries.
+/// To obtain a thread-safe [`File`], use the [`assume_no_fdget_pos`] conversion.
+///
+/// See the documentation for [`File`] for more information.
+///
+/// # Invariants
+///
+/// * All instances of this type are refcounted using the `f_count` field.
+/// * If there is an active call to `fdget_pos` that did not take the `f_pos_lock` mutex, then it
+/// must be on the same thread as this file.
+///
+/// [`assume_no_fdget_pos`]: LocalFile::assume_no_fdget_pos
+pub struct LocalFile {
+ inner: Opaque<bindings::file>,
+}
+
+// SAFETY: The type invariants guarantee that `LocalFile` is always ref-counted. This implementation
+// makes `ARef<File>` own a normal refcount.
+unsafe impl AlwaysRefCounted for LocalFile {
+ #[inline]
+ fn inc_ref(&self) {
+ // SAFETY: The existence of a shared reference means that the refcount is nonzero.
+ unsafe { bindings::get_file(self.as_ptr()) };
+ }
+
+ #[inline]
+ unsafe fn dec_ref(obj: ptr::NonNull<LocalFile>) {
+ // SAFETY: To call this method, the caller passes us ownership of a normal refcount, so we
+ // may drop it. The cast is okay since `File` has the same representation as `struct file`.
+ unsafe { bindings::fput(obj.cast().as_ptr()) }
+ }
+}
+
+impl LocalFile {
+ /// Constructs a new `struct file` wrapper from a file descriptor.
+ ///
+ /// The file descriptor belongs to the current process, and there might be active local calls
+ /// to `fdget_pos` on the same file.
+ ///
+ /// To obtain an `ARef<File>`, use the [`assume_no_fdget_pos`] function to convert.
+ ///
+ /// [`assume_no_fdget_pos`]: LocalFile::assume_no_fdget_pos
+ #[inline]
+ pub fn fget(fd: u32) -> Result<ARef<LocalFile>, BadFdError> {
+ // SAFETY: FFI call, there are no requirements on `fd`.
+ let ptr = ptr::NonNull::new(unsafe { bindings::fget(fd) }).ok_or(BadFdError)?;
+
+ // SAFETY: `bindings::fget` created a refcount, and we pass ownership of it to the `ARef`.
+ //
+ // INVARIANT: This file is in the fd table on this thread, so either all `fdget_pos` calls
+ // are on this thread, or the file is shared, in which case `fdget_pos` calls took the
+ // `f_pos_lock` mutex.
+ Ok(unsafe { ARef::from_raw(ptr.cast()) })
+ }
+
+ /// Creates a reference to a [`LocalFile`] from a valid pointer.
+ ///
+ /// # Safety
+ ///
+ /// * The caller must ensure that `ptr` points at a valid file and that the file's refcount is
+ /// positive for the duration of 'a.
+ /// * The caller must ensure that if there is an active call to `fdget_pos` that did not take
+ /// the `f_pos_lock` mutex, then that call is on the current thread.
+ #[inline]
+ pub unsafe fn from_raw_file<'a>(ptr: *const bindings::file) -> &'a LocalFile {
+ // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the
+ // duration of 'a. The cast is okay because `File` is `repr(transparent)`.
+ //
+ // INVARIANT: The caller guarantees that there are no problematic `fdget_pos` calls.
+ unsafe { &*ptr.cast() }
+ }
+
+ /// Assume that there are no active `fdget_pos` calls that prevent us from sharing this file.
+ ///
+ /// This makes it safe to transfer this file to other threads. No checks are performed, and
+ /// using it incorrectly may lead to a data race on the file position if the file is shared
+ /// with another thread.
+ ///
+ /// This method is intended to be used together with [`LocalFile::fget`] when the caller knows
+ /// statically that there are no `fdget_pos` calls on the current thread. For example, you
+ /// might use it when calling `fget` from an ioctl, since ioctls usually do not touch the file
+ /// position.
+ ///
+ /// # Safety
+ ///
+ /// There must not be any active `fdget_pos` calls on the current thread.
+ #[inline]
+ pub unsafe fn assume_no_fdget_pos(me: ARef<LocalFile>) -> ARef<File> {
+ // INVARIANT: There are no `fdget_pos` calls on the current thread, and by the type
+ // invariants, if there is a `fdget_pos` call on another thread, then it took the
+ // `f_pos_lock` mutex.
+ //
+ // SAFETY: `LocalFile` and `File` have the same layout.
+ unsafe { ARef::from_raw(ARef::into_raw(me).cast()) }
+ }
+
+ /// Returns a raw pointer to the inner C struct.
+ #[inline]
+ pub fn as_ptr(&self) -> *mut bindings::file {
+ self.inner.get()
+ }
+
+ /// Returns the credentials of the task that originally opened the file.
+ pub fn cred(&self) -> &Credential {
+ // SAFETY: It's okay to read the `f_cred` field without synchronization because `f_cred` is
+ // never changed after initialization of the file.
+ let ptr = unsafe { (*self.as_ptr()).f_cred };
+
+ // SAFETY: The signature of this function ensures that the caller will only access the
+ // returned credential while the file is still valid, and the C side ensures that the
+ // credential stays valid at least as long as the file.
+ unsafe { Credential::from_ptr(ptr) }
+ }
+
+ /// Returns the flags associated with the file.
+ ///
+ /// The flags are a combination of the constants in [`flags`].
+ #[inline]
+ pub fn flags(&self) -> u32 {
+ // This `read_volatile` is intended to correspond to a READ_ONCE call.
+ //
+ // SAFETY: The file is valid because the shared reference guarantees a nonzero refcount.
+ //
+ // FIXME(read_once): Replace with `read_once` when available on the Rust side.
+ unsafe { core::ptr::addr_of!((*self.as_ptr()).f_flags).read_volatile() }
+ }
+}
+
+impl File {
+ /// Creates a reference to a [`File`] from a valid pointer.
+ ///
+ /// # Safety
+ ///
+ /// * The caller must ensure that `ptr` points at a valid file and that the file's refcount is
+ /// positive for the duration of 'a.
+ /// * The caller must ensure that if there are active `fdget_pos` calls on this file, then they
+ /// took the `f_pos_lock` mutex.
+ #[inline]
+ pub unsafe fn from_raw_file<'a>(ptr: *const bindings::file) -> &'a File {
+ // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the
+ // duration of 'a. The cast is okay because `File` is `repr(transparent)`.
+ //
+ // INVARIANT: The caller guarantees that there are no problematic `fdget_pos` calls.
+ unsafe { &*ptr.cast() }
+ }
+}
+
+// Make LocalFile methods available on File.
+impl core::ops::Deref for File {
+ type Target = LocalFile;
+ #[inline]
+ fn deref(&self) -> &LocalFile {
+ // SAFETY: The caller provides a `&File`, and since it is a reference, it must point at a
+ // valid file for the desired duration.
+ //
+ // By the type invariants, there are no `fdget_pos` calls that did not take the
+ // `f_pos_lock` mutex.
+ unsafe { LocalFile::from_raw_file(self as *const File as *const bindings::file) }
+ }
+}
+
+/// A file descriptor reservation.
+///
+/// This allows the creation of a file descriptor in two steps: first, we reserve a slot for it,
+/// then we commit or drop the reservation. The first step may fail (e.g., the current process ran
+/// out of available slots), but commit and drop never fail (and are mutually exclusive).
+///
+/// Dropping the reservation happens in the destructor of this type.
+///
+/// # Invariants
+///
+/// The fd stored in this struct must correspond to a reserved file descriptor of the current task.
+pub struct FileDescriptorReservation {
+ fd: u32,
+ /// Prevent values of this type from being moved to a different task.
+ ///
+ /// The `fd_install` and `put_unused_fd` functions assume that the value of `current` is
+ /// unchanged since the call to `get_unused_fd_flags`. By adding this marker to this type, we
+ /// prevent it from being moved across task boundaries, which ensures that `current` does not
+ /// change while this value exists.
+ _not_send: NotThreadSafe,
+}
+
+impl FileDescriptorReservation {
+ /// Creates a new file descriptor reservation.
+ pub fn get_unused_fd_flags(flags: u32) -> Result<Self> {
+ // SAFETY: FFI call, there are no safety requirements on `flags`.
+ let fd: i32 = unsafe { bindings::get_unused_fd_flags(flags) };
+ if fd < 0 {
+ return Err(Error::from_errno(fd));
+ }
+ Ok(Self {
+ fd: fd as u32,
+ _not_send: NotThreadSafe,
+ })
+ }
+
+ /// Returns the file descriptor number that was reserved.
+ pub fn reserved_fd(&self) -> u32 {
+ self.fd
+ }
+
+ /// Commits the reservation.
+ ///
+ /// The previously reserved file descriptor is bound to `file`. This method consumes the
+ /// [`FileDescriptorReservation`], so it will not be usable after this call.
+ pub fn fd_install(self, file: ARef<File>) {
+ // SAFETY: `self.fd` was previously returned by `get_unused_fd_flags`. We have not yet used
+ // the fd, so it is still valid, and `current` still refers to the same task, as this type
+ // cannot be moved across task boundaries.
+ //
+ // Furthermore, the file pointer is guaranteed to own a refcount by its type invariants,
+ // and we take ownership of that refcount by not running the destructor below.
+ // Additionally, the file is known to not have any non-shared `fdget_pos` calls, so even if
+ // this process starts using the file position, this will not result in a data race on the
+ // file position.
+ unsafe { bindings::fd_install(self.fd, file.as_ptr()) };
+
+ // `fd_install` consumes both the file descriptor and the file reference, so we cannot run
+ // the destructors.
+ core::mem::forget(self);
+ core::mem::forget(file);
+ }
+}
+
+impl Drop for FileDescriptorReservation {
+ fn drop(&mut self) {
+ // SAFETY: By the type invariants of this type, `self.fd` was previously returned by
+ // `get_unused_fd_flags`. We have not yet used the fd, so it is still valid, and `current`
+ // still refers to the same task, as this type cannot be moved across task boundaries.
+ unsafe { bindings::put_unused_fd(self.fd) };
+ }
+}
+
+/// Represents the `EBADF` error code.
+///
+/// Used for methods that can only fail with `EBADF`.
+#[derive(Copy, Clone, Eq, PartialEq)]
+pub struct BadFdError;
+
+impl From<BadFdError> for Error {
+ #[inline]
+ fn from(_: BadFdError) -> Error {
+ EBADF
+ }
+}
+
+impl core::fmt::Debug for BadFdError {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ f.pad("EBADF")
+ }
+}
diff --git a/rust/kernel/generated_arch_static_branch_asm.rs.S b/rust/kernel/generated_arch_static_branch_asm.rs.S
new file mode 100644
index 000000000000..2afb638708db
--- /dev/null
+++ b/rust/kernel/generated_arch_static_branch_asm.rs.S
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/jump_label.h>
+
+// Cut here.
+
+::kernel::concat_literals!(ARCH_STATIC_BRANCH_ASM("{symb} + {off} + {branch}", "{l_yes}"))
diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index 65be9ae57b80..e25d047f3c82 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -13,7 +13,7 @@
//! To initialize a `struct` with an in-place constructor you will need two things:
//! - an in-place constructor,
//! - a memory location that can hold your `struct` (this can be the [stack], an [`Arc<T>`],
-//! [`UniqueArc<T>`], [`Box<T>`] or any other smart pointer that implements [`InPlaceInit`]).
+//! [`UniqueArc<T>`], [`KBox<T>`] or any other smart pointer that implements [`InPlaceInit`]).
//!
//! To get an in-place constructor there are generally three options:
//! - directly creating an in-place constructor using the [`pin_init!`] macro,
@@ -35,8 +35,8 @@
//! that you need to write `<-` instead of `:` for fields that you want to initialize in-place.
//!
//! ```rust
-//! # #![allow(clippy::disallowed_names)]
-//! use kernel::{prelude::*, sync::Mutex, new_mutex};
+//! # #![expect(clippy::disallowed_names)]
+//! use kernel::sync::{new_mutex, Mutex};
//! # use core::pin::Pin;
//! #[pin_data]
//! struct Foo {
@@ -55,8 +55,8 @@
//! (or just the stack) to actually initialize a `Foo`:
//!
//! ```rust
-//! # #![allow(clippy::disallowed_names)]
-//! # use kernel::{prelude::*, sync::Mutex, new_mutex};
+//! # #![expect(clippy::disallowed_names)]
+//! # use kernel::sync::{new_mutex, Mutex};
//! # use core::pin::Pin;
//! # #[pin_data]
//! # struct Foo {
@@ -68,7 +68,7 @@
//! # a <- new_mutex!(42, "Foo::a"),
//! # b: 24,
//! # });
-//! let foo: Result<Pin<Box<Foo>>> = Box::pin_init(foo);
+//! let foo: Result<Pin<KBox<Foo>>> = KBox::pin_init(foo, GFP_KERNEL);
//! ```
//!
//! For more information see the [`pin_init!`] macro.
@@ -79,27 +79,27 @@
//! above method only works for types where you can access the fields.
//!
//! ```rust
-//! # use kernel::{new_mutex, sync::{Arc, Mutex}};
-//! let mtx: Result<Arc<Mutex<usize>>> = Arc::pin_init(new_mutex!(42, "example::mtx"));
+//! # use kernel::sync::{new_mutex, Arc, Mutex};
+//! let mtx: Result<Arc<Mutex<usize>>> =
+//! Arc::pin_init(new_mutex!(42, "example::mtx"), GFP_KERNEL);
//! ```
//!
//! To declare an init macro/function you just return an [`impl PinInit<T, E>`]:
//!
//! ```rust
-//! # #![allow(clippy::disallowed_names)]
-//! # use kernel::{sync::Mutex, prelude::*, new_mutex, init::PinInit, try_pin_init};
+//! # use kernel::{sync::Mutex, new_mutex, init::PinInit, try_pin_init};
//! #[pin_data]
//! struct DriverData {
//! #[pin]
//! status: Mutex<i32>,
-//! buffer: Box<[u8; 1_000_000]>,
+//! buffer: KBox<[u8; 1_000_000]>,
//! }
//!
//! impl DriverData {
//! fn new() -> impl PinInit<Self, Error> {
//! try_pin_init!(Self {
//! status <- new_mutex!(0, "DriverData::status"),
-//! buffer: Box::init(kernel::init::zeroed())?,
+//! buffer: KBox::init(kernel::init::zeroed(), GFP_KERNEL)?,
//! })
//! }
//! }
@@ -120,11 +120,12 @@
//! `slot` gets called.
//!
//! ```rust
-//! # #![allow(unreachable_pub, clippy::disallowed_names)]
-//! use kernel::{prelude::*, init, types::Opaque};
+//! # #![expect(unreachable_pub, clippy::disallowed_names)]
+//! use kernel::{init, types::Opaque};
//! use core::{ptr::addr_of_mut, marker::PhantomPinned, pin::Pin};
//! # mod bindings {
-//! # #![allow(non_camel_case_types)]
+//! # #![expect(non_camel_case_types)]
+//! # #![expect(clippy::missing_safety_doc)]
//! # pub struct foo;
//! # pub unsafe fn init_foo(_ptr: *mut foo) {}
//! # pub unsafe fn destroy_foo(_ptr: *mut foo) {}
@@ -132,7 +133,7 @@
//! # }
//! # // `Error::from_errno` is `pub(crate)` in the `kernel` crate, thus provide a workaround.
//! # trait FromErrno {
-//! # fn from_errno(errno: core::ffi::c_int) -> Error {
+//! # fn from_errno(errno: kernel::ffi::c_int) -> Error {
//! # // Dummy error that can be constructed outside the `kernel` crate.
//! # Error::from(core::fmt::Error)
//! # }
@@ -210,13 +211,13 @@
//! [`pin_init!`]: crate::pin_init!
use crate::{
+ alloc::{AllocError, Flags, KBox},
error::{self, Error},
+ sync::Arc,
sync::UniqueArc,
types::{Opaque, ScopeGuard},
};
-use alloc::boxed::Box;
use core::{
- alloc::AllocError,
cell::UnsafeCell,
convert::Infallible,
marker::PhantomData,
@@ -236,7 +237,7 @@ pub mod macros;
/// # Examples
///
/// ```rust
-/// # #![allow(clippy::disallowed_names)]
+/// # #![expect(clippy::disallowed_names)]
/// # use kernel::{init, macros::pin_data, pin_init, stack_pin_init, init::*, sync::Mutex, new_mutex};
/// # use core::pin::Pin;
/// #[pin_data]
@@ -258,7 +259,7 @@ pub mod macros;
/// },
/// }));
/// let foo: Pin<&mut Foo> = foo;
-/// pr_info!("a: {}", &*foo.a.lock());
+/// pr_info!("a: {}\n", &*foo.a.lock());
/// ```
///
/// # Syntax
@@ -288,15 +289,23 @@ macro_rules! stack_pin_init {
/// # Examples
///
/// ```rust,ignore
-/// # #![allow(clippy::disallowed_names)]
-/// # use kernel::{init, pin_init, stack_try_pin_init, init::*, sync::Mutex, new_mutex};
+/// # #![expect(clippy::disallowed_names)]
+/// # use kernel::{
+/// # init,
+/// # pin_init,
+/// # stack_try_pin_init,
+/// # init::*,
+/// # sync::Mutex,
+/// # new_mutex,
+/// # alloc::AllocError,
+/// # };
/// # use macros::pin_data;
-/// # use core::{alloc::AllocError, pin::Pin};
+/// # use core::pin::Pin;
/// #[pin_data]
/// struct Foo {
/// #[pin]
/// a: Mutex<usize>,
-/// b: Box<Bar>,
+/// b: KBox<Bar>,
/// }
///
/// struct Bar {
@@ -305,24 +314,32 @@ macro_rules! stack_pin_init {
///
/// stack_try_pin_init!(let foo: Result<Pin<&mut Foo>, AllocError> = pin_init!(Foo {
/// a <- new_mutex!(42),
-/// b: Box::try_new(Bar {
+/// b: KBox::new(Bar {
/// x: 64,
-/// })?,
+/// }, GFP_KERNEL)?,
/// }));
/// let foo = foo.unwrap();
-/// pr_info!("a: {}", &*foo.a.lock());
+/// pr_info!("a: {}\n", &*foo.a.lock());
/// ```
///
/// ```rust,ignore
-/// # #![allow(clippy::disallowed_names)]
-/// # use kernel::{init, pin_init, stack_try_pin_init, init::*, sync::Mutex, new_mutex};
+/// # #![expect(clippy::disallowed_names)]
+/// # use kernel::{
+/// # init,
+/// # pin_init,
+/// # stack_try_pin_init,
+/// # init::*,
+/// # sync::Mutex,
+/// # new_mutex,
+/// # alloc::AllocError,
+/// # };
/// # use macros::pin_data;
-/// # use core::{alloc::AllocError, pin::Pin};
+/// # use core::pin::Pin;
/// #[pin_data]
/// struct Foo {
/// #[pin]
/// a: Mutex<usize>,
-/// b: Box<Bar>,
+/// b: KBox<Bar>,
/// }
///
/// struct Bar {
@@ -331,11 +348,11 @@ macro_rules! stack_pin_init {
///
/// stack_try_pin_init!(let foo: Pin<&mut Foo> =? pin_init!(Foo {
/// a <- new_mutex!(42),
-/// b: Box::try_new(Bar {
+/// b: KBox::new(Bar {
/// x: 64,
-/// })?,
+/// }, GFP_KERNEL)?,
/// }));
-/// pr_info!("a: {}", &*foo.a.lock());
+/// pr_info!("a: {}\n", &*foo.a.lock());
/// # Ok::<_, AllocError>(())
/// ```
///
@@ -366,7 +383,6 @@ macro_rules! stack_try_pin_init {
/// The syntax is almost identical to that of a normal `struct` initializer:
///
/// ```rust
-/// # #![allow(clippy::disallowed_names)]
/// # use kernel::{init, pin_init, macros::pin_data, init::*};
/// # use core::pin::Pin;
/// #[pin_data]
@@ -390,7 +406,7 @@ macro_rules! stack_try_pin_init {
/// },
/// });
/// # initializer }
-/// # Box::pin_init(demo()).unwrap();
+/// # KBox::pin_init(demo(), GFP_KERNEL).unwrap();
/// ```
///
/// Arbitrary Rust expressions can be used to set the value of a variable.
@@ -411,8 +427,7 @@ macro_rules! stack_try_pin_init {
/// To create an initializer function, simply declare it like this:
///
/// ```rust
-/// # #![allow(clippy::disallowed_names)]
-/// # use kernel::{init, pin_init, prelude::*, init::*};
+/// # use kernel::{init, pin_init, init::*};
/// # use core::pin::Pin;
/// # #[pin_data]
/// # struct Foo {
@@ -438,7 +453,7 @@ macro_rules! stack_try_pin_init {
/// Users of `Foo` can now create it like this:
///
/// ```rust
-/// # #![allow(clippy::disallowed_names)]
+/// # #![expect(clippy::disallowed_names)]
/// # use kernel::{init, pin_init, macros::pin_data, init::*};
/// # use core::pin::Pin;
/// # #[pin_data]
@@ -460,13 +475,12 @@ macro_rules! stack_try_pin_init {
/// # })
/// # }
/// # }
-/// let foo = Box::pin_init(Foo::new());
+/// let foo = KBox::pin_init(Foo::new(), GFP_KERNEL);
/// ```
///
/// They can also easily embed it into their own `struct`s:
///
/// ```rust
-/// # #![allow(clippy::disallowed_names)]
/// # use kernel::{init, pin_init, macros::pin_data, init::*};
/// # use core::pin::Pin;
/// # #[pin_data]
@@ -539,6 +553,7 @@ macro_rules! stack_try_pin_init {
/// }
/// pin_init!(&this in Buf {
/// buf: [0; 64],
+/// // SAFETY: TODO.
/// ptr: unsafe { addr_of_mut!((*this.as_ptr()).buf).cast() },
/// pin: PhantomPinned,
/// });
@@ -588,11 +603,10 @@ macro_rules! pin_init {
/// # Examples
///
/// ```rust
-/// # #![feature(new_uninit)]
/// use kernel::{init::{self, PinInit}, error::Error};
/// #[pin_data]
/// struct BigBuf {
-/// big: Box<[u8; 1024 * 1024 * 1024]>,
+/// big: KBox<[u8; 1024 * 1024 * 1024]>,
/// small: [u8; 1024 * 1024],
/// ptr: *mut u8,
/// }
@@ -600,7 +614,7 @@ macro_rules! pin_init {
/// impl BigBuf {
/// fn new() -> impl PinInit<Self, Error> {
/// try_pin_init!(Self {
-/// big: Box::init(init::zeroed())?,
+/// big: KBox::init(init::zeroed(), GFP_KERNEL)?,
/// small: [0; 1024 * 1024],
/// ptr: core::ptr::null_mut(),
/// }? Error)
@@ -692,16 +706,16 @@ macro_rules! init {
/// # Examples
///
/// ```rust
-/// use kernel::{init::{PinInit, zeroed}, error::Error};
+/// use kernel::{alloc::KBox, init::{PinInit, zeroed}, error::Error};
/// struct BigBuf {
-/// big: Box<[u8; 1024 * 1024 * 1024]>,
+/// big: KBox<[u8; 1024 * 1024 * 1024]>,
/// small: [u8; 1024 * 1024],
/// }
///
/// impl BigBuf {
/// fn new() -> impl Init<Self, Error> {
/// try_init!(Self {
-/// big: Box::init(zeroed())?,
+/// big: KBox::init(zeroed(), GFP_KERNEL)?,
/// small: [0; 1024 * 1024],
/// }? Error)
/// }
@@ -741,20 +755,88 @@ macro_rules! try_init {
};
}
+/// Asserts that a field on a struct using `#[pin_data]` is marked with `#[pin]` ie. that it is
+/// structurally pinned.
+///
+/// # Example
+///
+/// This will succeed:
+/// ```
+/// use kernel::assert_pinned;
+/// #[pin_data]
+/// struct MyStruct {
+/// #[pin]
+/// some_field: u64,
+/// }
+///
+/// assert_pinned!(MyStruct, some_field, u64);
+/// ```
+///
+/// This will fail:
+// TODO: replace with `compile_fail` when supported.
+/// ```ignore
+/// use kernel::assert_pinned;
+/// #[pin_data]
+/// struct MyStruct {
+/// some_field: u64,
+/// }
+///
+/// assert_pinned!(MyStruct, some_field, u64);
+/// ```
+///
+/// Some uses of the macro may trigger the `can't use generic parameters from outer item` error. To
+/// work around this, you may pass the `inline` parameter to the macro. The `inline` parameter can
+/// only be used when the macro is invoked from a function body.
+/// ```
+/// use kernel::assert_pinned;
+/// #[pin_data]
+/// struct Foo<T> {
+/// #[pin]
+/// elem: T,
+/// }
+///
+/// impl<T> Foo<T> {
+/// fn project(self: Pin<&mut Self>) -> Pin<&mut T> {
+/// assert_pinned!(Foo<T>, elem, T, inline);
+///
+/// // SAFETY: The field is structurally pinned.
+/// unsafe { self.map_unchecked_mut(|me| &mut me.elem) }
+/// }
+/// }
+/// ```
+#[macro_export]
+macro_rules! assert_pinned {
+ ($ty:ty, $field:ident, $field_ty:ty, inline) => {
+ let _ = move |ptr: *mut $field_ty| {
+ // SAFETY: This code is unreachable.
+ let data = unsafe { <$ty as $crate::init::__internal::HasPinData>::__pin_data() };
+ let init = $crate::init::__internal::AlwaysFail::<$field_ty>::new();
+ // SAFETY: This code is unreachable.
+ unsafe { data.$field(ptr, init) }.ok();
+ };
+ };
+
+ ($ty:ty, $field:ident, $field_ty:ty) => {
+ const _: () = {
+ $crate::assert_pinned!($ty, $field, $field_ty, inline);
+ };
+ };
+}
+
/// A pin-initializer for the type `T`.
///
/// To use this initializer, you will need a suitable memory location that can hold a `T`. This can
-/// be [`Box<T>`], [`Arc<T>`], [`UniqueArc<T>`] or even the stack (see [`stack_pin_init!`]). Use the
-/// [`InPlaceInit::pin_init`] function of a smart pointer like [`Arc<T>`] on this.
+/// be [`KBox<T>`], [`Arc<T>`], [`UniqueArc<T>`] or even the stack (see [`stack_pin_init!`]). Use
+/// the [`InPlaceInit::pin_init`] function of a smart pointer like [`Arc<T>`] on this.
///
/// Also see the [module description](self).
///
/// # Safety
///
-/// When implementing this type you will need to take great care. Also there are probably very few
+/// When implementing this trait you will need to take great care. Also there are probably very few
/// cases where a manual implementation is necessary. Use [`pin_init_from_closure`] where possible.
///
-/// The [`PinInit::__pinned_init`] function
+/// The [`PinInit::__pinned_init`] function:
/// - returns `Ok(())` if it initialized every field of `slot`,
/// - returns `Err(err)` if it encountered an error and then cleaned `slot`, this means:
/// - `slot` can be deallocated without UB occurring,
@@ -784,11 +866,11 @@ pub unsafe trait PinInit<T: ?Sized, E = Infallible>: Sized {
/// # Examples
///
/// ```rust
- /// # #![allow(clippy::disallowed_names)]
+ /// # #![expect(clippy::disallowed_names)]
/// use kernel::{types::Opaque, init::pin_init_from_closure};
/// #[repr(C)]
/// struct RawFoo([u8; 16]);
- /// extern {
+ /// extern "C" {
/// fn init_foo(_: *mut RawFoo);
/// }
///
@@ -800,11 +882,12 @@ pub unsafe trait PinInit<T: ?Sized, E = Infallible>: Sized {
///
/// impl Foo {
/// fn setup(self: Pin<&mut Self>) {
- /// pr_info!("Setting up foo");
+ /// pr_info!("Setting up foo\n");
/// }
/// }
///
/// let foo = pin_init!(Foo {
+ /// // SAFETY: TODO.
/// raw <- unsafe {
/// Opaque::ffi_init(|s| {
/// init_foo(s);
@@ -824,7 +907,7 @@ pub unsafe trait PinInit<T: ?Sized, E = Infallible>: Sized {
}
/// An initializer returned by [`PinInit::pin_chain`].
-pub struct ChainPinInit<I, F, T: ?Sized, E>(I, F, __internal::Invariant<(E, Box<T>)>);
+pub struct ChainPinInit<I, F, T: ?Sized, E>(I, F, __internal::Invariant<(E, KBox<T>)>);
// SAFETY: The `__pinned_init` function is implemented such that it
// - returns `Ok(())` on successful initialization,
@@ -842,29 +925,26 @@ where
let val = unsafe { &mut *slot };
// SAFETY: `slot` is considered pinned.
let val = unsafe { Pin::new_unchecked(val) };
- (self.1)(val).map_err(|e| {
- // SAFETY: `slot` was initialized above.
- unsafe { core::ptr::drop_in_place(slot) };
- e
- })
+ // SAFETY: `slot` was initialized above.
+ (self.1)(val).inspect_err(|_| unsafe { core::ptr::drop_in_place(slot) })
}
}
/// An initializer for `T`.
///
/// To use this initializer, you will need a suitable memory location that can hold a `T`. This can
-/// be [`Box<T>`], [`Arc<T>`], [`UniqueArc<T>`] or even the stack (see [`stack_pin_init!`]). Use the
-/// [`InPlaceInit::init`] function of a smart pointer like [`Arc<T>`] on this. Because
+/// be [`KBox<T>`], [`Arc<T>`], [`UniqueArc<T>`] or even the stack (see [`stack_pin_init!`]). Use
+/// the [`InPlaceInit::init`] function of a smart pointer like [`Arc<T>`] on this. Because
/// [`PinInit<T, E>`] is a super trait, you can use every function that takes it as well.
///
/// Also see the [module description](self).
///
/// # Safety
///
-/// When implementing this type you will need to take great care. Also there are probably very few
+/// When implementing this trait you will need to take great care. Also there are probably very few
/// cases where a manual implementation is necessary. Use [`init_from_closure`] where possible.
///
-/// The [`Init::__init`] function
+/// The [`Init::__init`] function:
/// - returns `Ok(())` if it initialized every field of `slot`,
/// - returns `Err(err)` if it encountered an error and then cleaned `slot`, this means:
/// - `slot` can be deallocated without UB occurring,
@@ -898,7 +978,7 @@ pub unsafe trait Init<T: ?Sized, E = Infallible>: PinInit<T, E> {
/// # Examples
///
/// ```rust
- /// # #![allow(clippy::disallowed_names)]
+ /// # #![expect(clippy::disallowed_names)]
/// use kernel::{types::Opaque, init::{self, init_from_closure}};
/// struct Foo {
/// buf: [u8; 1_000_000],
@@ -906,7 +986,7 @@ pub unsafe trait Init<T: ?Sized, E = Infallible>: PinInit<T, E> {
///
/// impl Foo {
/// fn setup(&mut self) {
- /// pr_info!("Setting up foo");
+ /// pr_info!("Setting up foo\n");
/// }
/// }
///
@@ -926,7 +1006,7 @@ pub unsafe trait Init<T: ?Sized, E = Infallible>: PinInit<T, E> {
}
/// An initializer returned by [`Init::chain`].
-pub struct ChainInit<I, F, T: ?Sized, E>(I, F, __internal::Invariant<(E, Box<T>)>);
+pub struct ChainInit<I, F, T: ?Sized, E>(I, F, __internal::Invariant<(E, KBox<T>)>);
// SAFETY: The `__init` function is implemented such that it
// - returns `Ok(())` on successful initialization,
@@ -940,11 +1020,9 @@ where
// SAFETY: All requirements fulfilled since this function is `__init`.
unsafe { self.0.__pinned_init(slot)? };
// SAFETY: The above call initialized `slot` and we still have unique access.
- (self.1)(unsafe { &mut *slot }).map_err(|e| {
+ (self.1)(unsafe { &mut *slot }).inspect_err(|_|
// SAFETY: `slot` was initialized above.
- unsafe { core::ptr::drop_in_place(slot) };
- e
- })
+ unsafe { core::ptr::drop_in_place(slot) })
}
}
@@ -1012,9 +1090,11 @@ pub fn uninit<T, E>() -> impl Init<MaybeUninit<T>, E> {
/// # Examples
///
/// ```rust
-/// use kernel::{error::Error, init::init_array_from_fn};
-/// let array: Box<[usize; 1_000]>= Box::init::<Error>(init_array_from_fn(|i| i)).unwrap();
+/// use kernel::{alloc::KBox, error::Error, init::init_array_from_fn};
+/// let array: KBox<[usize; 1_000]> =
+/// KBox::init::<Error>(init_array_from_fn(|i| i), GFP_KERNEL)?;
/// assert_eq!(array.len(), 1_000);
+/// # Ok::<(), Error>(())
/// ```
pub fn init_array_from_fn<I, const N: usize, T, E>(
mut make_init: impl FnMut(usize) -> I,
@@ -1027,7 +1107,7 @@ where
// Counts the number of initialized elements and when dropped drops that many elements from
// `slot`.
let mut init_count = ScopeGuard::new_with_data(0, |i| {
- // We now free every element that has been initialized before:
+ // We now free every element that has been initialized before.
// SAFETY: The loop initialized exactly the values from 0..i and since we
// return `Err` below, the caller will consider the memory at `slot` as
// uninitialized.
@@ -1056,9 +1136,10 @@ where
///
/// ```rust
/// use kernel::{sync::{Arc, Mutex}, init::pin_init_array_from_fn, new_mutex};
-/// let array: Arc<[Mutex<usize>; 1_000]>=
-/// Arc::pin_init(pin_init_array_from_fn(|i| new_mutex!(i))).unwrap();
+/// let array: Arc<[Mutex<usize>; 1_000]> =
+/// Arc::pin_init(pin_init_array_from_fn(|i| new_mutex!(i)), GFP_KERNEL)?;
/// assert_eq!(array.len(), 1_000);
+/// # Ok::<(), Error>(())
/// ```
pub fn pin_init_array_from_fn<I, const N: usize, T, E>(
mut make_init: impl FnMut(usize) -> I,
@@ -1071,7 +1152,7 @@ where
// Counts the number of initialized elements and when dropped drops that many elements from
// `slot`.
let mut init_count = ScopeGuard::new_with_data(0, |i| {
- // We now free every element that has been initialized before:
+ // We now free every element that has been initialized before.
// SAFETY: The loop initialized exactly the values from 0..i and since we
// return `Err` below, the caller will consider the memory at `slot` as
// uninitialized.
@@ -1097,6 +1178,7 @@ where
// SAFETY: Every type can be initialized by-value.
unsafe impl<T, E> Init<T, E> for T {
unsafe fn __init(self, slot: *mut T) -> Result<(), E> {
+ // SAFETY: TODO.
unsafe { slot.write(self) };
Ok(())
}
@@ -1105,17 +1187,24 @@ unsafe impl<T, E> Init<T, E> for T {
// SAFETY: Every type can be initialized by-value. `__pinned_init` calls `__init`.
unsafe impl<T, E> PinInit<T, E> for T {
unsafe fn __pinned_init(self, slot: *mut T) -> Result<(), E> {
+ // SAFETY: TODO.
unsafe { self.__init(slot) }
}
}
/// Smart pointer that can initialize memory in-place.
pub trait InPlaceInit<T>: Sized {
+ /// Pinned version of `Self`.
+ ///
+ /// If a type already implicitly pins its pointee, `Pin<Self>` is unnecessary. In this case use
+ /// `Self`, otherwise just use `Pin<Self>`.
+ type PinnedSelf;
+
/// Use the given pin-initializer to pin-initialize a `T` inside of a new smart pointer of this
/// type.
///
/// If `T: !Unpin` it will not be able to move afterwards.
- fn try_pin_init<E>(init: impl PinInit<T, E>) -> Result<Pin<Self>, E>
+ fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Self::PinnedSelf, E>
where
E: From<AllocError>;
@@ -1123,7 +1212,7 @@ pub trait InPlaceInit<T>: Sized {
/// type.
///
/// If `T: !Unpin` it will not be able to move afterwards.
- fn pin_init<E>(init: impl PinInit<T, E>) -> error::Result<Pin<Self>>
+ fn pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> error::Result<Self::PinnedSelf>
where
Error: From<E>,
{
@@ -1131,16 +1220,16 @@ pub trait InPlaceInit<T>: Sized {
let init = unsafe {
pin_init_from_closure(|slot| init.__pinned_init(slot).map_err(|e| Error::from(e)))
};
- Self::try_pin_init(init)
+ Self::try_pin_init(init, flags)
}
/// Use the given initializer to in-place initialize a `T`.
- fn try_init<E>(init: impl Init<T, E>) -> Result<Self, E>
+ fn try_init<E>(init: impl Init<T, E>, flags: Flags) -> Result<Self, E>
where
E: From<AllocError>;
/// Use the given initializer to in-place initialize a `T`.
- fn init<E>(init: impl Init<T, E>) -> error::Result<Self>
+ fn init<E>(init: impl Init<T, E>, flags: Flags) -> error::Result<Self>
where
Error: From<E>,
{
@@ -1148,67 +1237,85 @@ pub trait InPlaceInit<T>: Sized {
let init = unsafe {
init_from_closure(|slot| init.__pinned_init(slot).map_err(|e| Error::from(e)))
};
- Self::try_init(init)
+ Self::try_init(init, flags)
}
}
-impl<T> InPlaceInit<T> for Box<T> {
+impl<T> InPlaceInit<T> for Arc<T> {
+ type PinnedSelf = Self;
+
#[inline]
- fn try_pin_init<E>(init: impl PinInit<T, E>) -> Result<Pin<Self>, E>
+ fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Self::PinnedSelf, E>
where
E: From<AllocError>,
{
- let mut this = Box::try_new_uninit()?;
- let slot = this.as_mut_ptr();
- // SAFETY: When init errors/panics, slot will get deallocated but not dropped,
- // slot is valid and will not be moved, because we pin it later.
- unsafe { init.__pinned_init(slot)? };
- // SAFETY: All fields have been initialized.
- Ok(unsafe { this.assume_init() }.into())
+ UniqueArc::try_pin_init(init, flags).map(|u| u.into())
}
#[inline]
- fn try_init<E>(init: impl Init<T, E>) -> Result<Self, E>
+ fn try_init<E>(init: impl Init<T, E>, flags: Flags) -> Result<Self, E>
where
E: From<AllocError>,
{
- let mut this = Box::try_new_uninit()?;
- let slot = this.as_mut_ptr();
- // SAFETY: When init errors/panics, slot will get deallocated but not dropped,
- // slot is valid.
- unsafe { init.__init(slot)? };
- // SAFETY: All fields have been initialized.
- Ok(unsafe { this.assume_init() })
+ UniqueArc::try_init(init, flags).map(|u| u.into())
}
}
impl<T> InPlaceInit<T> for UniqueArc<T> {
+ type PinnedSelf = Pin<Self>;
+
#[inline]
- fn try_pin_init<E>(init: impl PinInit<T, E>) -> Result<Pin<Self>, E>
+ fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Self::PinnedSelf, E>
where
E: From<AllocError>,
{
- let mut this = UniqueArc::try_new_uninit()?;
- let slot = this.as_mut_ptr();
- // SAFETY: When init errors/panics, slot will get deallocated but not dropped,
- // slot is valid and will not be moved, because we pin it later.
- unsafe { init.__pinned_init(slot)? };
- // SAFETY: All fields have been initialized.
- Ok(unsafe { this.assume_init() }.into())
+ UniqueArc::new_uninit(flags)?.write_pin_init(init)
}
#[inline]
- fn try_init<E>(init: impl Init<T, E>) -> Result<Self, E>
+ fn try_init<E>(init: impl Init<T, E>, flags: Flags) -> Result<Self, E>
where
E: From<AllocError>,
{
- let mut this = UniqueArc::try_new_uninit()?;
- let slot = this.as_mut_ptr();
+ UniqueArc::new_uninit(flags)?.write_init(init)
+ }
+}
+
+/// Smart pointer containing uninitialized memory and that can write a value.
+pub trait InPlaceWrite<T> {
+ /// The type `Self` turns into when the contents are initialized.
+ type Initialized;
+
+ /// Use the given initializer to write a value into `self`.
+ ///
+ /// Does not drop the current value and considers it as uninitialized memory.
+ fn write_init<E>(self, init: impl Init<T, E>) -> Result<Self::Initialized, E>;
+
+ /// Use the given pin-initializer to write a value into `self`.
+ ///
+ /// Does not drop the current value and considers it as uninitialized memory.
+ fn write_pin_init<E>(self, init: impl PinInit<T, E>) -> Result<Pin<Self::Initialized>, E>;
+}
+
+impl<T> InPlaceWrite<T> for UniqueArc<MaybeUninit<T>> {
+ type Initialized = UniqueArc<T>;
+
+ fn write_init<E>(mut self, init: impl Init<T, E>) -> Result<Self::Initialized, E> {
+ let slot = self.as_mut_ptr();
// SAFETY: When init errors/panics, slot will get deallocated but not dropped,
// slot is valid.
unsafe { init.__init(slot)? };
// SAFETY: All fields have been initialized.
- Ok(unsafe { this.assume_init() })
+ Ok(unsafe { self.assume_init() })
+ }
+
+ fn write_pin_init<E>(mut self, init: impl PinInit<T, E>) -> Result<Pin<Self::Initialized>, E> {
+ let slot = self.as_mut_ptr();
+ // SAFETY: When init errors/panics, slot will get deallocated but not dropped,
+ // slot is valid and will not be moved, because we pin it later.
+ unsafe { init.__pinned_init(slot)? };
+ // SAFETY: All fields have been initialized.
+ Ok(unsafe { self.assume_init() }.into())
}
}
@@ -1229,7 +1336,7 @@ impl<T> InPlaceInit<T> for UniqueArc<T> {
/// #[pinned_drop]
/// impl PinnedDrop for Foo {
/// fn drop(self: Pin<&mut Self>) {
-/// pr_info!("Foo is being dropped!");
+/// pr_info!("Foo is being dropped!\n");
/// }
/// }
/// ```
@@ -1280,6 +1387,7 @@ pub fn zeroed<T: Zeroable>() -> impl Init<T> {
macro_rules! impl_zeroable {
($($({$($generics:tt)*})? $t:ty, )*) => {
+ // SAFETY: Safety comments written in the macro invocation.
$(unsafe impl$($($generics)*)? Zeroable for $t {})*
};
}
@@ -1292,8 +1400,15 @@ impl_zeroable! {
i8, i16, i32, i64, i128, isize,
f32, f64,
- // SAFETY: These are ZSTs, there is nothing to zero.
- {<T: ?Sized>} PhantomData<T>, core::marker::PhantomPinned, Infallible, (),
+ // Note: do not add uninhabited types (such as `!` or `core::convert::Infallible`) to this list;
+ // creating an instance of an uninhabited type is immediate undefined behavior. For more on
+ // uninhabited/empty types, consult The Rustonomicon:
+ // <https://doc.rust-lang.org/stable/nomicon/exotic-sizes.html#empty-types>. The Rust Reference
+ // also has information on undefined behavior:
+ // <https://doc.rust-lang.org/stable/reference/behavior-considered-undefined.html>.
+ //
+ // SAFETY: These are inhabited ZSTs; there is nothing to zero and a valid value exists.
+ {<T: ?Sized>} PhantomData<T>, core::marker::PhantomPinned, (),
// SAFETY: Type is allowed to take any value, including all zeros.
{<T>} MaybeUninit<T>,
@@ -1303,17 +1418,14 @@ impl_zeroable! {
// SAFETY: `T: Zeroable` and `UnsafeCell` is `repr(transparent)`.
{<T: ?Sized + Zeroable>} UnsafeCell<T>,
- // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee).
+ // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee:
+ // https://doc.rust-lang.org/stable/std/option/index.html#representation).
Option<NonZeroU8>, Option<NonZeroU16>, Option<NonZeroU32>, Option<NonZeroU64>,
Option<NonZeroU128>, Option<NonZeroUsize>,
Option<NonZeroI8>, Option<NonZeroI16>, Option<NonZeroI32>, Option<NonZeroI64>,
Option<NonZeroI128>, Option<NonZeroIsize>,
-
- // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee).
- //
- // In this case we are allowed to use `T: ?Sized`, since all zeros is the `None` variant.
- {<T: ?Sized>} Option<NonNull<T>>,
- {<T: ?Sized>} Option<Box<T>>,
+ {<T>} Option<NonNull<T>>,
+ {<T>} Option<KBox<T>>,
// SAFETY: `null` pointer is valid.
//
diff --git a/rust/kernel/init/__internal.rs b/rust/kernel/init/__internal.rs
index db3372619ecd..74329cc3262c 100644
--- a/rust/kernel/init/__internal.rs
+++ b/rust/kernel/init/__internal.rs
@@ -15,9 +15,10 @@ use super::*;
/// [this table]: https://doc.rust-lang.org/nomicon/phantom-data.html#table-of-phantomdata-patterns
pub(super) type Invariant<T> = PhantomData<fn(*mut T) -> *mut T>;
-/// This is the module-internal type implementing `PinInit` and `Init`. It is unsafe to create this
-/// type, since the closure needs to fulfill the same safety requirement as the
-/// `__pinned_init`/`__init` functions.
+/// Module-internal type implementing `PinInit` and `Init`.
+///
+/// It is unsafe to create this type, since the closure needs to fulfill the same safety
+/// requirement as the `__pinned_init`/`__init` functions.
pub(crate) struct InitClosure<F, T: ?Sized, E>(pub(crate) F, pub(crate) Invariant<(E, T)>);
// SAFETY: While constructing the `InitClosure`, the user promised that it upholds the
@@ -53,6 +54,7 @@ where
pub unsafe trait HasPinData {
type PinData: PinData;
+ #[expect(clippy::missing_safety_doc)]
unsafe fn __pin_data() -> Self::PinData;
}
@@ -82,6 +84,7 @@ pub unsafe trait PinData: Copy {
pub unsafe trait HasInitData {
type InitData: InitData;
+ #[expect(clippy::missing_safety_doc)]
unsafe fn __init_data() -> Self::InitData;
}
@@ -102,7 +105,7 @@ pub unsafe trait InitData: Copy {
}
}
-pub struct AllData<T: ?Sized>(PhantomData<fn(Box<T>) -> Box<T>>);
+pub struct AllData<T: ?Sized>(PhantomData<fn(KBox<T>) -> KBox<T>>);
impl<T: ?Sized> Clone for AllData<T> {
fn clone(&self) -> Self {
@@ -112,10 +115,12 @@ impl<T: ?Sized> Clone for AllData<T> {
impl<T: ?Sized> Copy for AllData<T> {}
+// SAFETY: TODO.
unsafe impl<T: ?Sized> InitData for AllData<T> {
type Datee = T;
}
+// SAFETY: TODO.
unsafe impl<T: ?Sized> HasInitData for T {
type InitData = AllData<T>;
@@ -228,3 +233,32 @@ impl OnlyCallFromDrop {
Self(())
}
}
+
+/// Initializer that always fails.
+///
+/// Used by [`assert_pinned!`].
+///
+/// [`assert_pinned!`]: crate::assert_pinned
+pub struct AlwaysFail<T: ?Sized> {
+ _t: PhantomData<T>,
+}
+
+impl<T: ?Sized> AlwaysFail<T> {
+ /// Creates a new initializer that always fails.
+ pub fn new() -> Self {
+ Self { _t: PhantomData }
+ }
+}
+
+impl<T: ?Sized> Default for AlwaysFail<T> {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+// SAFETY: `__pinned_init` always fails, which is always okay.
+unsafe impl<T: ?Sized> PinInit<T, ()> for AlwaysFail<T> {
+ unsafe fn __pinned_init(self, _slot: *mut T) -> Result<(), ()> {
+ Err(())
+ }
+}
diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs
index cb6e61b6c50b..b7213962a6a5 100644
--- a/rust/kernel/init/macros.rs
+++ b/rust/kernel/init/macros.rs
@@ -45,7 +45,7 @@
//! #[pinned_drop]
//! impl PinnedDrop for Foo {
//! fn drop(self: Pin<&mut Self>) {
-//! pr_info!("{self:p} is getting dropped.");
+//! pr_info!("{self:p} is getting dropped.\n");
//! }
//! }
//!
@@ -145,7 +145,7 @@
//! }
//! }
//! // Implement the internal `PinData` trait that marks the pin-data struct as a pin-data
-//! // struct. This is important to ensure that no user can implement a rouge `__pin_data`
+//! // struct. This is important to ensure that no user can implement a rogue `__pin_data`
//! // function without using `unsafe`.
//! unsafe impl<T> ::kernel::init::__internal::PinData for __ThePinData<T> {
//! type Datee = Bar<T>;
@@ -156,7 +156,7 @@
//! // case no such fields exist, hence this is almost empty. The two phantomdata fields exist
//! // for two reasons:
//! // - `__phantom`: every generic must be used, since we cannot really know which generics
-//! // are used, we declere all and then use everything here once.
+//! // are used, we declare all and then use everything here once.
//! // - `__phantom_pin`: uses the `'__pin` lifetime and ensures that this struct is invariant
//! // over it. The lifetime is needed to work around the limitation that trait bounds must
//! // not be trivial, e.g. the user has a `#[pin] PhantomPinned` field -- this is
@@ -182,13 +182,13 @@
//! // Normally `Drop` bounds do not have the correct semantics, but for this purpose they do
//! // (normally people want to know if a type has any kind of drop glue at all, here we want
//! // to know if it has any kind of custom drop glue, which is exactly what this bound does).
-//! #[allow(drop_bounds)]
+//! #[expect(drop_bounds)]
//! impl<T: ::core::ops::Drop> MustNotImplDrop for T {}
//! impl<T> MustNotImplDrop for Bar<T> {}
//! // Here comes a convenience check, if one implemented `PinnedDrop`, but forgot to add it to
//! // `#[pin_data]`, then this will error with the same mechanic as above, this is not needed
//! // for safety, but a good sanity check, since no normal code calls `PinnedDrop::drop`.
-//! #[allow(non_camel_case_types)]
+//! #[expect(non_camel_case_types)]
//! trait UselessPinnedDropImpl_you_need_to_specify_PinnedDrop {}
//! impl<
//! T: ::kernel::init::PinnedDrop,
@@ -250,7 +250,7 @@
//! // error type is `Infallible`) we will need to drop this field if there
//! // is an error later. This `DropGuard` will drop the field when it gets
//! // dropped and has not yet been forgotten.
-//! let t = unsafe {
+//! let __t_guard = unsafe {
//! ::pinned_init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).t))
//! };
//! // Expansion of `x: 0,`:
@@ -261,14 +261,14 @@
//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).x), x) };
//! }
//! // We again create a `DropGuard`.
-//! let x = unsafe {
+//! let __x_guard = unsafe {
//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).x))
//! };
//! // Since initialization has successfully completed, we can now forget
//! // the guards. This is not `mem::forget`, since we only have
//! // `&DropGuard`.
-//! ::core::mem::forget(x);
-//! ::core::mem::forget(t);
+//! ::core::mem::forget(__x_guard);
+//! ::core::mem::forget(__t_guard);
//! // Here we use the type checker to ensure that every field has been
//! // initialized exactly once, since this is `if false` it will never get
//! // executed, but still type-checked.
@@ -412,7 +412,7 @@
//! #[pinned_drop]
//! impl PinnedDrop for Foo {
//! fn drop(self: Pin<&mut Self>) {
-//! pr_info!("{self:p} is getting dropped.");
+//! pr_info!("{self:p} is getting dropped.\n");
//! }
//! }
//! ```
@@ -423,7 +423,7 @@
//! // `unsafe`, full path and the token parameter are added, everything else stays the same.
//! unsafe impl ::kernel::init::PinnedDrop for Foo {
//! fn drop(self: Pin<&mut Self>, _: ::kernel::init::__internal::OnlyCallFromDrop) {
-//! pr_info!("{self:p} is getting dropped.");
+//! pr_info!("{self:p} is getting dropped.\n");
//! }
//! }
//! ```
@@ -461,16 +461,16 @@
//! {
//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).a), a) };
//! }
-//! let a = unsafe {
+//! let __a_guard = unsafe {
//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).a))
//! };
//! let init = Bar::new(36);
//! unsafe { data.b(::core::addr_of_mut!((*slot).b), b)? };
-//! let b = unsafe {
+//! let __b_guard = unsafe {
//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).b))
//! };
-//! ::core::mem::forget(b);
-//! ::core::mem::forget(a);
+//! ::core::mem::forget(__b_guard);
+//! ::core::mem::forget(__a_guard);
//! #[allow(unreachable_code, clippy::diverging_sub_expression)]
//! let _ = || {
//! unsafe {
@@ -513,6 +513,7 @@ macro_rules! __pinned_drop {
}
),
) => {
+ // SAFETY: TODO.
unsafe $($impl_sig)* {
// Inherit all attributes and the type/ident tokens for the signature.
$(#[$($attr)*])*
@@ -538,6 +539,7 @@ macro_rules! __pin_data {
),
@impl_generics($($impl_generics:tt)*),
@ty_generics($($ty_generics:tt)*),
+ @decl_generics($($decl_generics:tt)*),
@body({ $($fields:tt)* }),
) => {
// We now use token munching to iterate through all of the fields. While doing this we
@@ -560,6 +562,9 @@ macro_rules! __pin_data {
@impl_generics($($impl_generics)*),
// The 'ty generics', the generics that will need to be specified on the impl blocks.
@ty_generics($($ty_generics)*),
+ // The 'decl generics', the generics that need to be specified on the struct
+ // definition.
+ @decl_generics($($decl_generics)*),
// The where clause of any impl block and the declaration.
@where($($($whr)*)?),
// The remaining fields tokens that need to be processed.
@@ -585,6 +590,7 @@ macro_rules! __pin_data {
@name($name:ident),
@impl_generics($($impl_generics:tt)*),
@ty_generics($($ty_generics:tt)*),
+ @decl_generics($($decl_generics:tt)*),
@where($($whr:tt)*),
// We found a PhantomPinned field, this should generally be pinned!
@fields_munch($field:ident : $($($(::)?core::)?marker::)?PhantomPinned, $($rest:tt)*),
@@ -607,6 +613,7 @@ macro_rules! __pin_data {
@name($name),
@impl_generics($($impl_generics)*),
@ty_generics($($ty_generics)*),
+ @decl_generics($($decl_generics)*),
@where($($whr)*),
@fields_munch($($rest)*),
@pinned($($pinned)* $($accum)* $field: ::core::marker::PhantomPinned,),
@@ -623,6 +630,7 @@ macro_rules! __pin_data {
@name($name:ident),
@impl_generics($($impl_generics:tt)*),
@ty_generics($($ty_generics:tt)*),
+ @decl_generics($($decl_generics:tt)*),
@where($($whr:tt)*),
// We reached the field declaration.
@fields_munch($field:ident : $type:ty, $($rest:tt)*),
@@ -640,6 +648,7 @@ macro_rules! __pin_data {
@name($name),
@impl_generics($($impl_generics)*),
@ty_generics($($ty_generics)*),
+ @decl_generics($($decl_generics)*),
@where($($whr)*),
@fields_munch($($rest)*),
@pinned($($pinned)* $($accum)* $field: $type,),
@@ -656,6 +665,7 @@ macro_rules! __pin_data {
@name($name:ident),
@impl_generics($($impl_generics:tt)*),
@ty_generics($($ty_generics:tt)*),
+ @decl_generics($($decl_generics:tt)*),
@where($($whr:tt)*),
// We reached the field declaration.
@fields_munch($field:ident : $type:ty, $($rest:tt)*),
@@ -673,6 +683,7 @@ macro_rules! __pin_data {
@name($name),
@impl_generics($($impl_generics)*),
@ty_generics($($ty_generics)*),
+ @decl_generics($($decl_generics)*),
@where($($whr)*),
@fields_munch($($rest)*),
@pinned($($pinned)*),
@@ -689,6 +700,7 @@ macro_rules! __pin_data {
@name($name:ident),
@impl_generics($($impl_generics:tt)*),
@ty_generics($($ty_generics:tt)*),
+ @decl_generics($($decl_generics:tt)*),
@where($($whr:tt)*),
// We found the `#[pin]` attr.
@fields_munch(#[pin] $($rest:tt)*),
@@ -705,6 +717,7 @@ macro_rules! __pin_data {
@name($name),
@impl_generics($($impl_generics)*),
@ty_generics($($ty_generics)*),
+ @decl_generics($($decl_generics)*),
@where($($whr)*),
@fields_munch($($rest)*),
// We do not include `#[pin]` in the list of attributes, since it is not actually an
@@ -724,6 +737,7 @@ macro_rules! __pin_data {
@name($name:ident),
@impl_generics($($impl_generics:tt)*),
@ty_generics($($ty_generics:tt)*),
+ @decl_generics($($decl_generics:tt)*),
@where($($whr:tt)*),
// We reached the field declaration with visibility, for simplicity we only munch the
// visibility and put it into `$accum`.
@@ -741,6 +755,7 @@ macro_rules! __pin_data {
@name($name),
@impl_generics($($impl_generics)*),
@ty_generics($($ty_generics)*),
+ @decl_generics($($decl_generics)*),
@where($($whr)*),
@fields_munch($field $($rest)*),
@pinned($($pinned)*),
@@ -757,6 +772,7 @@ macro_rules! __pin_data {
@name($name:ident),
@impl_generics($($impl_generics:tt)*),
@ty_generics($($ty_generics:tt)*),
+ @decl_generics($($decl_generics:tt)*),
@where($($whr:tt)*),
// Some other attribute, just put it into `$accum`.
@fields_munch(#[$($attr:tt)*] $($rest:tt)*),
@@ -773,6 +789,7 @@ macro_rules! __pin_data {
@name($name),
@impl_generics($($impl_generics)*),
@ty_generics($($ty_generics)*),
+ @decl_generics($($decl_generics)*),
@where($($whr)*),
@fields_munch($($rest)*),
@pinned($($pinned)*),
@@ -789,6 +806,7 @@ macro_rules! __pin_data {
@name($name:ident),
@impl_generics($($impl_generics:tt)*),
@ty_generics($($ty_generics:tt)*),
+ @decl_generics($($decl_generics:tt)*),
@where($($whr:tt)*),
// We reached the end of the fields, plus an optional additional comma, since we added one
// before and the user is also allowed to put a trailing comma.
@@ -802,7 +820,7 @@ macro_rules! __pin_data {
) => {
// Declare the struct with all fields in the correct order.
$($struct_attrs)*
- $vis struct $name <$($impl_generics)*>
+ $vis struct $name <$($decl_generics)*>
where $($whr)*
{
$($fields)*
@@ -855,6 +873,7 @@ macro_rules! __pin_data {
}
}
+ // SAFETY: TODO.
unsafe impl<$($impl_generics)*>
$crate::init::__internal::PinData for __ThePinData<$($ty_generics)*>
where $($whr)*
@@ -906,14 +925,14 @@ macro_rules! __pin_data {
// `Drop`. Additionally we will implement this trait for the struct leading to a conflict,
// if it also implements `Drop`
trait MustNotImplDrop {}
- #[allow(drop_bounds)]
+ #[expect(drop_bounds)]
impl<T: ::core::ops::Drop> MustNotImplDrop for T {}
impl<$($impl_generics)*> MustNotImplDrop for $name<$($ty_generics)*>
where $($whr)* {}
// We also take care to prevent users from writing a useless `PinnedDrop` implementation.
// They might implement `PinnedDrop` correctly for the struct, but forget to give
// `PinnedDrop` as the parameter to `#[pin_data]`.
- #[allow(non_camel_case_types)]
+ #[expect(non_camel_case_types)]
trait UselessPinnedDropImpl_you_need_to_specify_PinnedDrop {}
impl<T: $crate::init::PinnedDrop>
UselessPinnedDropImpl_you_need_to_specify_PinnedDrop for T {}
@@ -970,6 +989,7 @@ macro_rules! __pin_data {
//
// The functions are `unsafe` to prevent accidentally calling them.
#[allow(dead_code)]
+ #[expect(clippy::missing_safety_doc)]
impl<$($impl_generics)*> $pin_data<$($ty_generics)*>
where $($whr)*
{
@@ -980,6 +1000,7 @@ macro_rules! __pin_data {
slot: *mut $p_type,
init: impl $crate::init::PinInit<$p_type, E>,
) -> ::core::result::Result<(), E> {
+ // SAFETY: TODO.
unsafe { $crate::init::PinInit::__pinned_init(init, slot) }
}
)*
@@ -990,6 +1011,7 @@ macro_rules! __pin_data {
slot: *mut $type,
init: impl $crate::init::Init<$type, E>,
) -> ::core::result::Result<(), E> {
+ // SAFETY: TODO.
unsafe { $crate::init::Init::__init(init, slot) }
}
)*
@@ -1104,6 +1126,8 @@ macro_rules! __init_internal {
// no possibility of returning without `unsafe`.
struct __InitOk;
// Get the data about fields from the supplied type.
+ //
+ // SAFETY: TODO.
let data = unsafe {
use $crate::init::__internal::$has_data;
// Here we abuse `paste!` to retokenize `$t`. Declarative macros have some internal
@@ -1159,6 +1183,7 @@ macro_rules! __init_internal {
let init = move |slot| -> ::core::result::Result<(), $err> {
init(slot).map(|__InitOk| ())
};
+ // SAFETY: TODO.
let init = unsafe { $crate::init::$construct_closure::<_, $err>(init) };
init
}};
@@ -1192,14 +1217,14 @@ macro_rules! __init_internal {
// We use `paste!` to create new hygiene for `$field`.
::kernel::macros::paste! {
// SAFETY: We forget the guard later when initialization has succeeded.
- let [<$field>] = unsafe {
+ let [< __ $field _guard >] = unsafe {
$crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
};
$crate::__init_internal!(init_slot($use_data):
@data($data),
@slot($slot),
- @guards([<$field>], $($guards,)*),
+ @guards([< __ $field _guard >], $($guards,)*),
@munch_fields($($rest)*),
);
}
@@ -1223,14 +1248,14 @@ macro_rules! __init_internal {
// We use `paste!` to create new hygiene for `$field`.
::kernel::macros::paste! {
// SAFETY: We forget the guard later when initialization has succeeded.
- let [<$field>] = unsafe {
+ let [< __ $field _guard >] = unsafe {
$crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
};
$crate::__init_internal!(init_slot():
@data($data),
@slot($slot),
- @guards([<$field>], $($guards,)*),
+ @guards([< __ $field _guard >], $($guards,)*),
@munch_fields($($rest)*),
);
}
@@ -1255,14 +1280,14 @@ macro_rules! __init_internal {
// We use `paste!` to create new hygiene for `$field`.
::kernel::macros::paste! {
// SAFETY: We forget the guard later when initialization has succeeded.
- let [<$field>] = unsafe {
+ let [< __ $field _guard >] = unsafe {
$crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
};
$crate::__init_internal!(init_slot($($use_data)?):
@data($data),
@slot($slot),
- @guards([<$field>], $($guards,)*),
+ @guards([< __ $field _guard >], $($guards,)*),
@munch_fields($($rest)*),
);
}
@@ -1307,6 +1332,8 @@ macro_rules! __init_internal {
// Endpoint, nothing more to munch, create the initializer.
// Since we are in the closure that is never called, this will never get executed.
// We abuse `slot` to get the correct type inference here:
+ //
+ // SAFETY: TODO.
unsafe {
// Here we abuse `paste!` to retokenize `$t`. Declarative macros have some internal
// information that is associated to already parsed fragments, so a path fragment
diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs
new file mode 100644
index 000000000000..d4a73e52e3ee
--- /dev/null
+++ b/rust/kernel/io.rs
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Memory-mapped IO.
+//!
+//! C header: [`include/asm-generic/io.h`](srctree/include/asm-generic/io.h)
+
+use crate::error::{code::EINVAL, Result};
+use crate::{bindings, build_assert};
+
+/// Raw representation of an MMIO region.
+///
+/// By itself, the existence of an instance of this structure does not provide any guarantees that
+/// the represented MMIO region does exist or is properly mapped.
+///
+/// Instead, the bus specific MMIO implementation must convert this raw representation into an `Io`
+/// instance providing the actual memory accessors. Only by the conversion into an `Io` structure
+/// any guarantees are given.
+pub struct IoRaw<const SIZE: usize = 0> {
+ addr: usize,
+ maxsize: usize,
+}
+
+impl<const SIZE: usize> IoRaw<SIZE> {
+ /// Returns a new `IoRaw` instance on success, an error otherwise.
+ pub fn new(addr: usize, maxsize: usize) -> Result<Self> {
+ if maxsize < SIZE {
+ return Err(EINVAL);
+ }
+
+ Ok(Self { addr, maxsize })
+ }
+
+ /// Returns the base address of the MMIO region.
+ #[inline]
+ pub fn addr(&self) -> usize {
+ self.addr
+ }
+
+ /// Returns the maximum size of the MMIO region.
+ #[inline]
+ pub fn maxsize(&self) -> usize {
+ self.maxsize
+ }
+}
+
+/// IO-mapped memory, starting at the base address @addr and spanning @maxlen bytes.
+///
+/// The creator (usually a subsystem / bus such as PCI) is responsible for creating the
+/// mapping, performing an additional region request etc.
+///
+/// # Invariant
+///
+/// `addr` is the start and `maxsize` the length of valid I/O mapped memory region of size
+/// `maxsize`.
+///
+/// # Examples
+///
+/// ```no_run
+/// # use kernel::{bindings, io::{Io, IoRaw}};
+/// # use core::ops::Deref;
+///
+/// // See also [`pci::Bar`] for a real example.
+/// struct IoMem<const SIZE: usize>(IoRaw<SIZE>);
+///
+/// impl<const SIZE: usize> IoMem<SIZE> {
+/// /// # Safety
+/// ///
+/// /// [`paddr`, `paddr` + `SIZE`) must be a valid MMIO region that is mappable into the CPUs
+/// /// virtual address space.
+/// unsafe fn new(paddr: usize) -> Result<Self>{
+/// // SAFETY: By the safety requirements of this function [`paddr`, `paddr` + `SIZE`) is
+/// // valid for `ioremap`.
+/// let addr = unsafe { bindings::ioremap(paddr as _, SIZE as _) };
+/// if addr.is_null() {
+/// return Err(ENOMEM);
+/// }
+///
+/// Ok(IoMem(IoRaw::new(addr as _, SIZE)?))
+/// }
+/// }
+///
+/// impl<const SIZE: usize> Drop for IoMem<SIZE> {
+/// fn drop(&mut self) {
+/// // SAFETY: `self.0.addr()` is guaranteed to be properly mapped by `Self::new`.
+/// unsafe { bindings::iounmap(self.0.addr() as _); };
+/// }
+/// }
+///
+/// impl<const SIZE: usize> Deref for IoMem<SIZE> {
+/// type Target = Io<SIZE>;
+///
+/// fn deref(&self) -> &Self::Target {
+/// // SAFETY: The memory range stored in `self` has been properly mapped in `Self::new`.
+/// unsafe { Io::from_raw(&self.0) }
+/// }
+/// }
+///
+///# fn no_run() -> Result<(), Error> {
+/// // SAFETY: Invalid usage for example purposes.
+/// let iomem = unsafe { IoMem::<{ core::mem::size_of::<u32>() }>::new(0xBAAAAAAD)? };
+/// iomem.writel(0x42, 0x0);
+/// assert!(iomem.try_writel(0x42, 0x0).is_ok());
+/// assert!(iomem.try_writel(0x42, 0x4).is_err());
+/// # Ok(())
+/// # }
+/// ```
+#[repr(transparent)]
+pub struct Io<const SIZE: usize = 0>(IoRaw<SIZE>);
+
+macro_rules! define_read {
+ ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => {
+ /// Read IO data from a given offset known at compile time.
+ ///
+ /// Bound checks are performed on compile time, hence if the offset is not known at compile
+ /// time, the build will fail.
+ $(#[$attr])*
+ #[inline]
+ pub fn $name(&self, offset: usize) -> $type_name {
+ let addr = self.io_addr_assert::<$type_name>(offset);
+
+ // SAFETY: By the type invariant `addr` is a valid address for MMIO operations.
+ unsafe { bindings::$name(addr as _) }
+ }
+
+ /// Read IO data from a given offset.
+ ///
+ /// Bound checks are performed on runtime, it fails if the offset (plus the type size) is
+ /// out of bounds.
+ $(#[$attr])*
+ pub fn $try_name(&self, offset: usize) -> Result<$type_name> {
+ let addr = self.io_addr::<$type_name>(offset)?;
+
+ // SAFETY: By the type invariant `addr` is a valid address for MMIO operations.
+ Ok(unsafe { bindings::$name(addr as _) })
+ }
+ };
+}
+
+macro_rules! define_write {
+ ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => {
+ /// Write IO data from a given offset known at compile time.
+ ///
+ /// Bound checks are performed on compile time, hence if the offset is not known at compile
+ /// time, the build will fail.
+ $(#[$attr])*
+ #[inline]
+ pub fn $name(&self, value: $type_name, offset: usize) {
+ let addr = self.io_addr_assert::<$type_name>(offset);
+
+ // SAFETY: By the type invariant `addr` is a valid address for MMIO operations.
+ unsafe { bindings::$name(value, addr as _, ) }
+ }
+
+ /// Write IO data from a given offset.
+ ///
+ /// Bound checks are performed on runtime, it fails if the offset (plus the type size) is
+ /// out of bounds.
+ $(#[$attr])*
+ pub fn $try_name(&self, value: $type_name, offset: usize) -> Result {
+ let addr = self.io_addr::<$type_name>(offset)?;
+
+ // SAFETY: By the type invariant `addr` is a valid address for MMIO operations.
+ unsafe { bindings::$name(value, addr as _) }
+ Ok(())
+ }
+ };
+}
+
+impl<const SIZE: usize> Io<SIZE> {
+ /// Converts an `IoRaw` into an `Io` instance, providing the accessors to the MMIO mapping.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that `addr` is the start of a valid I/O mapped memory region of size
+ /// `maxsize`.
+ pub unsafe fn from_raw(raw: &IoRaw<SIZE>) -> &Self {
+ // SAFETY: `Io` is a transparent wrapper around `IoRaw`.
+ unsafe { &*core::ptr::from_ref(raw).cast() }
+ }
+
+ /// Returns the base address of this mapping.
+ #[inline]
+ pub fn addr(&self) -> usize {
+ self.0.addr()
+ }
+
+ /// Returns the maximum size of this mapping.
+ #[inline]
+ pub fn maxsize(&self) -> usize {
+ self.0.maxsize()
+ }
+
+ #[inline]
+ const fn offset_valid<U>(offset: usize, size: usize) -> bool {
+ let type_size = core::mem::size_of::<U>();
+ if let Some(end) = offset.checked_add(type_size) {
+ end <= size && offset % type_size == 0
+ } else {
+ false
+ }
+ }
+
+ #[inline]
+ fn io_addr<U>(&self, offset: usize) -> Result<usize> {
+ if !Self::offset_valid::<U>(offset, self.maxsize()) {
+ return Err(EINVAL);
+ }
+
+ // Probably no need to check, since the safety requirements of `Self::new` guarantee that
+ // this can't overflow.
+ self.addr().checked_add(offset).ok_or(EINVAL)
+ }
+
+ #[inline]
+ fn io_addr_assert<U>(&self, offset: usize) -> usize {
+ build_assert!(Self::offset_valid::<U>(offset, SIZE));
+
+ self.addr() + offset
+ }
+
+ define_read!(readb, try_readb, u8);
+ define_read!(readw, try_readw, u16);
+ define_read!(readl, try_readl, u32);
+ define_read!(
+ #[cfg(CONFIG_64BIT)]
+ readq,
+ try_readq,
+ u64
+ );
+
+ define_read!(readb_relaxed, try_readb_relaxed, u8);
+ define_read!(readw_relaxed, try_readw_relaxed, u16);
+ define_read!(readl_relaxed, try_readl_relaxed, u32);
+ define_read!(
+ #[cfg(CONFIG_64BIT)]
+ readq_relaxed,
+ try_readq_relaxed,
+ u64
+ );
+
+ define_write!(writeb, try_writeb, u8);
+ define_write!(writew, try_writew, u16);
+ define_write!(writel, try_writel, u32);
+ define_write!(
+ #[cfg(CONFIG_64BIT)]
+ writeq,
+ try_writeq,
+ u64
+ );
+
+ define_write!(writeb_relaxed, try_writeb_relaxed, u8);
+ define_write!(writew_relaxed, try_writew_relaxed, u16);
+ define_write!(writel_relaxed, try_writel_relaxed, u32);
+ define_write!(
+ #[cfg(CONFIG_64BIT)]
+ writeq_relaxed,
+ try_writeq_relaxed,
+ u64
+ );
+}
diff --git a/rust/kernel/ioctl.rs b/rust/kernel/ioctl.rs
index f1d42ab69972..2fc7662339e5 100644
--- a/rust/kernel/ioctl.rs
+++ b/rust/kernel/ioctl.rs
@@ -1,10 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-//! ioctl() number definitions
+//! `ioctl()` number definitions.
//!
//! C header: [`include/asm-generic/ioctl.h`](srctree/include/asm-generic/ioctl.h)
-#![allow(non_snake_case)]
+#![expect(non_snake_case)]
use crate::build_assert;
@@ -28,13 +28,13 @@ pub const fn _IO(ty: u32, nr: u32) -> u32 {
_IOC(uapi::_IOC_NONE, ty, nr, 0)
}
-/// Build an ioctl number for an read-only ioctl.
+/// Build an ioctl number for a read-only ioctl.
#[inline(always)]
pub const fn _IOR<T>(ty: u32, nr: u32) -> u32 {
_IOC(uapi::_IOC_READ, ty, nr, core::mem::size_of::<T>())
}
-/// Build an ioctl number for an write-only ioctl.
+/// Build an ioctl number for a write-only ioctl.
#[inline(always)]
pub const fn _IOW<T>(ty: u32, nr: u32) -> u32 {
_IOC(uapi::_IOC_WRITE, ty, nr, core::mem::size_of::<T>())
diff --git a/rust/kernel/jump_label.rs b/rust/kernel/jump_label.rs
new file mode 100644
index 000000000000..4e974c768dbd
--- /dev/null
+++ b/rust/kernel/jump_label.rs
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Logic for static keys.
+//!
+//! C header: [`include/linux/jump_label.h`](srctree/include/linux/jump_label.h).
+
+/// Branch based on a static key.
+///
+/// Takes three arguments:
+///
+/// * `key` - the path to the static variable containing the `static_key`.
+/// * `keytyp` - the type of `key`.
+/// * `field` - the name of the field of `key` that contains the `static_key`.
+///
+/// # Safety
+///
+/// The macro must be used with a real static key defined by C.
+#[macro_export]
+macro_rules! static_branch_unlikely {
+ ($key:path, $keytyp:ty, $field:ident) => {{
+ let _key: *const $keytyp = ::core::ptr::addr_of!($key);
+ let _key: *const $crate::bindings::static_key_false = ::core::ptr::addr_of!((*_key).$field);
+ let _key: *const $crate::bindings::static_key = _key.cast();
+
+ #[cfg(not(CONFIG_JUMP_LABEL))]
+ {
+ $crate::bindings::static_key_count(_key.cast_mut()) > 0
+ }
+
+ #[cfg(CONFIG_JUMP_LABEL)]
+ $crate::jump_label::arch_static_branch! { $key, $keytyp, $field, false }
+ }};
+}
+pub use static_branch_unlikely;
+
+/// Assert that the assembly block evaluates to a string literal.
+#[cfg(CONFIG_JUMP_LABEL)]
+const _: &str = include!(concat!(
+ env!("OBJTREE"),
+ "/rust/kernel/generated_arch_static_branch_asm.rs"
+));
+
+#[macro_export]
+#[doc(hidden)]
+#[cfg(CONFIG_JUMP_LABEL)]
+macro_rules! arch_static_branch {
+ ($key:path, $keytyp:ty, $field:ident, $branch:expr) => {'my_label: {
+ $crate::asm!(
+ include!(concat!(env!("OBJTREE"), "/rust/kernel/generated_arch_static_branch_asm.rs"));
+ l_yes = label {
+ break 'my_label true;
+ },
+ symb = sym $key,
+ off = const ::core::mem::offset_of!($keytyp, $field),
+ branch = const $crate::jump_label::bool_to_int($branch),
+ );
+
+ break 'my_label false;
+ }};
+}
+
+#[cfg(CONFIG_JUMP_LABEL)]
+pub use arch_static_branch;
+
+/// A helper used by inline assembly to pass a boolean to as a `const` parameter.
+///
+/// Using this function instead of a cast lets you assert that the input is a boolean, and not some
+/// other type that can also be cast to an integer.
+#[doc(hidden)]
+pub const fn bool_to_int(b: bool) -> i32 {
+ b as i32
+}
diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs
index 0ba77276ae7e..824da0e9738a 100644
--- a/rust/kernel/kunit.rs
+++ b/rust/kernel/kunit.rs
@@ -18,7 +18,7 @@ pub fn err(args: fmt::Arguments<'_>) {
#[cfg(CONFIG_PRINTK)]
unsafe {
bindings::_printk(
- b"\x013%pA\0".as_ptr() as _,
+ c"\x013%pA".as_ptr() as _,
&args as *const _ as *const c_void,
);
}
@@ -34,7 +34,7 @@ pub fn info(args: fmt::Arguments<'_>) {
#[cfg(CONFIG_PRINTK)]
unsafe {
bindings::_printk(
- b"\x016%pA\0".as_ptr() as _,
+ c"\x016%pA".as_ptr() as _,
&args as *const _ as *const c_void,
);
}
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 7ac39874aeac..7697c60b2d1a 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -6,21 +6,24 @@
//! usage by Rust code in the kernel and is shared by all of them.
//!
//! In other words, all the rest of the Rust code in the kernel (e.g. kernel
-//! modules written in Rust) depends on [`core`], [`alloc`] and this crate.
+//! modules written in Rust) depends on [`core`] and this crate.
//!
//! If you need a kernel C API that is not ported or wrapped yet here, then
//! do so first instead of bypassing this crate.
#![no_std]
-#![feature(allocator_api)]
-#![feature(coerce_unsized)]
-#![feature(const_maybe_uninit_zeroed)]
-#![feature(dispatch_from_dyn)]
-#![feature(new_uninit)]
-#![feature(offset_of)]
-#![feature(ptr_metadata)]
-#![feature(receiver_trait)]
-#![feature(unsize)]
+#![feature(arbitrary_self_types)]
+#![cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, feature(derive_coerce_pointee))]
+#![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(coerce_unsized))]
+#![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(dispatch_from_dyn))]
+#![cfg_attr(not(CONFIG_RUSTC_HAS_COERCE_POINTEE), feature(unsize))]
+#![feature(inline_const)]
+#![feature(lint_reasons)]
+// Stable in Rust 1.83
+#![feature(const_maybe_uninit_as_mut_ptr)]
+#![feature(const_mut_refs)]
+#![feature(const_ptr_write)]
+#![feature(const_refs_to_cell)]
// Ensure conditional compilation based on the kernel configuration works;
// otherwise we may silently break things like initcall handling.
@@ -30,26 +33,57 @@ compile_error!("Missing kernel configuration for conditional compilation");
// Allow proc-macros to refer to `::kernel` inside the `kernel` crate (this crate).
extern crate self as kernel;
-#[cfg(not(test))]
-#[cfg(not(testlib))]
-mod allocator;
-mod build_assert;
+pub use ffi;
+
+pub mod alloc;
+#[cfg(CONFIG_BLOCK)]
+pub mod block;
+#[doc(hidden)]
+pub mod build_assert;
+pub mod cred;
+pub mod device;
+pub mod device_id;
+pub mod devres;
+pub mod driver;
pub mod error;
+pub mod faux;
+#[cfg(CONFIG_RUST_FW_LOADER_ABSTRACTIONS)]
+pub mod firmware;
+pub mod fs;
pub mod init;
+pub mod io;
pub mod ioctl;
+pub mod jump_label;
#[cfg(CONFIG_KUNIT)]
pub mod kunit;
+pub mod list;
+pub mod miscdevice;
#[cfg(CONFIG_NET)]
pub mod net;
+pub mod of;
+pub mod page;
+#[cfg(CONFIG_PCI)]
+pub mod pci;
+pub mod pid_namespace;
+pub mod platform;
pub mod prelude;
pub mod print;
+pub mod rbtree;
+pub mod revocable;
+pub mod security;
+pub mod seq_file;
+pub mod sizes;
mod static_assert;
#[doc(hidden)]
pub mod std_vendor;
pub mod str;
pub mod sync;
pub mod task;
+pub mod time;
+pub mod tracepoint;
+pub mod transmute;
pub mod types;
+pub mod uaccess;
pub mod workqueue;
#[doc(hidden)]
@@ -57,16 +91,13 @@ pub use bindings;
pub use macros;
pub use uapi;
-#[doc(hidden)]
-pub use build_error::build_error;
-
/// Prefix to appear before log messages printed from within the `kernel` crate.
const __LOG_PREFIX: &[u8] = b"rust_kernel\0";
/// The top level entrypoint to implementing a kernel module.
///
/// For any teardown or cleanup operations, your type may implement [`Drop`].
-pub trait Module: Sized + Sync {
+pub trait Module: Sized + Sync + Send {
/// Called at module initialization time.
///
/// Use this method to perform whatever setup or registration your module
@@ -76,9 +107,38 @@ pub trait Module: Sized + Sync {
fn init(module: &'static ThisModule) -> error::Result<Self>;
}
+/// A module that is pinned and initialised in-place.
+pub trait InPlaceModule: Sync + Send {
+ /// Creates an initialiser for the module.
+ ///
+ /// It is called when the module is loaded.
+ fn init(module: &'static ThisModule) -> impl init::PinInit<Self, error::Error>;
+}
+
+impl<T: Module> InPlaceModule for T {
+ fn init(module: &'static ThisModule) -> impl init::PinInit<Self, error::Error> {
+ let initer = move |slot: *mut Self| {
+ let m = <Self as Module>::init(module)?;
+
+ // SAFETY: `slot` is valid for write per the contract with `pin_init_from_closure`.
+ unsafe { slot.write(m) };
+ Ok(())
+ };
+
+ // SAFETY: On success, `initer` always fully initialises an instance of `Self`.
+ unsafe { init::pin_init_from_closure(initer) }
+ }
+}
+
+/// Metadata attached to a [`Module`] or [`InPlaceModule`].
+pub trait ModuleMetadata {
+ /// The name of the module as specified in the `module!` macro.
+ const NAME: &'static crate::str::CStr;
+}
+
/// Equivalent to `THIS_MODULE` in the C API.
///
-/// C header: `include/linux/export.h`
+/// C header: [`include/linux/init.h`](srctree/include/linux/init.h)
pub struct ThisModule(*mut bindings::module);
// SAFETY: `THIS_MODULE` may be used from all threads within a module.
@@ -93,6 +153,13 @@ impl ThisModule {
pub const unsafe fn from_ptr(ptr: *mut bindings::module) -> ThisModule {
ThisModule(ptr)
}
+
+ /// Access the raw pointer for this module.
+ ///
+ /// It is up to the user to use it correctly.
+ pub const fn as_ptr(&self) -> *mut bindings::module {
+ self.0
+ }
}
#[cfg(not(any(testlib, test)))]
@@ -102,3 +169,70 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! {
// SAFETY: FFI call.
unsafe { bindings::BUG() };
}
+
+/// Produces a pointer to an object from a pointer to one of its fields.
+///
+/// # Safety
+///
+/// The pointer passed to this macro, and the pointer returned by this macro, must both be in
+/// bounds of the same allocation.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::container_of;
+/// struct Test {
+/// a: u64,
+/// b: u32,
+/// }
+///
+/// let test = Test { a: 10, b: 20 };
+/// let b_ptr = &test.b;
+/// // SAFETY: The pointer points at the `b` field of a `Test`, so the resulting pointer will be
+/// // in-bounds of the same allocation as `b_ptr`.
+/// let test_alias = unsafe { container_of!(b_ptr, Test, b) };
+/// assert!(core::ptr::eq(&test, test_alias));
+/// ```
+#[macro_export]
+macro_rules! container_of {
+ ($ptr:expr, $type:ty, $($f:tt)*) => {{
+ let ptr = $ptr as *const _ as *const u8;
+ let offset: usize = ::core::mem::offset_of!($type, $($f)*);
+ ptr.sub(offset) as *const $type
+ }}
+}
+
+/// Helper for `.rs.S` files.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! concat_literals {
+ ($( $asm:literal )* ) => {
+ ::core::concat!($($asm),*)
+ };
+}
+
+/// Wrapper around `asm!` configured for use in the kernel.
+///
+/// Uses a semicolon to avoid parsing ambiguities, even though this does not match native `asm!`
+/// syntax.
+// For x86, `asm!` uses intel syntax by default, but we want to use at&t syntax in the kernel.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[macro_export]
+macro_rules! asm {
+ ($($asm:expr),* ; $($rest:tt)*) => {
+ ::core::arch::asm!( $($asm)*, options(att_syntax), $($rest)* )
+ };
+}
+
+/// Wrapper around `asm!` configured for use in the kernel.
+///
+/// Uses a semicolon to avoid parsing ambiguities, even though this does not match native `asm!`
+/// syntax.
+// For non-x86 arches we just pass through to `asm!`.
+#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+#[macro_export]
+macro_rules! asm {
+ ($($asm:expr),* ; $($rest:tt)*) => {
+ ::core::arch::asm!( $($asm)*, $($rest)* )
+ };
+}
diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs
new file mode 100644
index 000000000000..fb93330f4af4
--- /dev/null
+++ b/rust/kernel/list.rs
@@ -0,0 +1,687 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! A linked list implementation.
+
+use crate::init::PinInit;
+use crate::sync::ArcBorrow;
+use crate::types::Opaque;
+use core::iter::{DoubleEndedIterator, FusedIterator};
+use core::marker::PhantomData;
+use core::ptr;
+
+mod impl_list_item_mod;
+pub use self::impl_list_item_mod::{
+ impl_has_list_links, impl_has_list_links_self_ptr, impl_list_item, HasListLinks, HasSelfPtr,
+};
+
+mod arc;
+pub use self::arc::{impl_list_arc_safe, AtomicTracker, ListArc, ListArcSafe, TryNewListArc};
+
+mod arc_field;
+pub use self::arc_field::{define_list_arc_field_getter, ListArcField};
+
+/// A linked list.
+///
+/// All elements in this linked list will be [`ListArc`] references to the value. Since a value can
+/// only have one `ListArc` (for each pair of prev/next pointers), this ensures that the same
+/// prev/next pointers are not used for several linked lists.
+///
+/// # Invariants
+///
+/// * If the list is empty, then `first` is null. Otherwise, `first` points at the `ListLinks`
+/// field of the first element in the list.
+/// * All prev/next pointers in `ListLinks` fields of items in the list are valid and form a cycle.
+/// * For every item in the list, the list owns the associated [`ListArc`] reference and has
+/// exclusive access to the `ListLinks` field.
+pub struct List<T: ?Sized + ListItem<ID>, const ID: u64 = 0> {
+ first: *mut ListLinksFields,
+ _ty: PhantomData<ListArc<T, ID>>,
+}
+
+// SAFETY: This is a container of `ListArc<T, ID>`, and access to the container allows the same
+// type of access to the `ListArc<T, ID>` elements.
+unsafe impl<T, const ID: u64> Send for List<T, ID>
+where
+ ListArc<T, ID>: Send,
+ T: ?Sized + ListItem<ID>,
+{
+}
+// SAFETY: This is a container of `ListArc<T, ID>`, and access to the container allows the same
+// type of access to the `ListArc<T, ID>` elements.
+unsafe impl<T, const ID: u64> Sync for List<T, ID>
+where
+ ListArc<T, ID>: Sync,
+ T: ?Sized + ListItem<ID>,
+{
+}
+
+/// Implemented by types where a [`ListArc<Self>`] can be inserted into a [`List`].
+///
+/// # Safety
+///
+/// Implementers must ensure that they provide the guarantees documented on methods provided by
+/// this trait.
+///
+/// [`ListArc<Self>`]: ListArc
+pub unsafe trait ListItem<const ID: u64 = 0>: ListArcSafe<ID> {
+ /// Views the [`ListLinks`] for this value.
+ ///
+ /// # Guarantees
+ ///
+ /// If there is a previous call to `prepare_to_insert` and there is no call to `post_remove`
+ /// since the most recent such call, then this returns the same pointer as the one returned by
+ /// the most recent call to `prepare_to_insert`.
+ ///
+ /// Otherwise, the returned pointer points at a read-only [`ListLinks`] with two null pointers.
+ ///
+ /// # Safety
+ ///
+ /// The provided pointer must point at a valid value. (It need not be in an `Arc`.)
+ unsafe fn view_links(me: *const Self) -> *mut ListLinks<ID>;
+
+ /// View the full value given its [`ListLinks`] field.
+ ///
+ /// Can only be used when the value is in a list.
+ ///
+ /// # Guarantees
+ ///
+ /// * Returns the same pointer as the one passed to the most recent call to `prepare_to_insert`.
+ /// * The returned pointer is valid until the next call to `post_remove`.
+ ///
+ /// # Safety
+ ///
+ /// * The provided pointer must originate from the most recent call to `prepare_to_insert`, or
+ /// from a call to `view_links` that happened after the most recent call to
+ /// `prepare_to_insert`.
+ /// * Since the most recent call to `prepare_to_insert`, the `post_remove` method must not have
+ /// been called.
+ unsafe fn view_value(me: *mut ListLinks<ID>) -> *const Self;
+
+ /// This is called when an item is inserted into a [`List`].
+ ///
+ /// # Guarantees
+ ///
+ /// The caller is granted exclusive access to the returned [`ListLinks`] until `post_remove` is
+ /// called.
+ ///
+ /// # Safety
+ ///
+ /// * The provided pointer must point at a valid value in an [`Arc`].
+ /// * Calls to `prepare_to_insert` and `post_remove` on the same value must alternate.
+ /// * The caller must own the [`ListArc`] for this value.
+ /// * The caller must not give up ownership of the [`ListArc`] unless `post_remove` has been
+ /// called after this call to `prepare_to_insert`.
+ ///
+ /// [`Arc`]: crate::sync::Arc
+ unsafe fn prepare_to_insert(me: *const Self) -> *mut ListLinks<ID>;
+
+ /// This undoes a previous call to `prepare_to_insert`.
+ ///
+ /// # Guarantees
+ ///
+ /// The returned pointer is the pointer that was originally passed to `prepare_to_insert`.
+ ///
+ /// # Safety
+ ///
+ /// The provided pointer must be the pointer returned by the most recent call to
+ /// `prepare_to_insert`.
+ unsafe fn post_remove(me: *mut ListLinks<ID>) -> *const Self;
+}
+
+#[repr(C)]
+#[derive(Copy, Clone)]
+struct ListLinksFields {
+ next: *mut ListLinksFields,
+ prev: *mut ListLinksFields,
+}
+
+/// The prev/next pointers for an item in a linked list.
+///
+/// # Invariants
+///
+/// The fields are null if and only if this item is not in a list.
+#[repr(transparent)]
+pub struct ListLinks<const ID: u64 = 0> {
+ // This type is `!Unpin` for aliasing reasons as the pointers are part of an intrusive linked
+ // list.
+ inner: Opaque<ListLinksFields>,
+}
+
+// SAFETY: The only way to access/modify the pointers inside of `ListLinks<ID>` is via holding the
+// associated `ListArc<T, ID>`. Since that type correctly implements `Send`, it is impossible to
+// move this an instance of this type to a different thread if the pointees are `!Send`.
+unsafe impl<const ID: u64> Send for ListLinks<ID> {}
+// SAFETY: The type is opaque so immutable references to a ListLinks are useless. Therefore, it's
+// okay to have immutable access to a ListLinks from several threads at once.
+unsafe impl<const ID: u64> Sync for ListLinks<ID> {}
+
+impl<const ID: u64> ListLinks<ID> {
+ /// Creates a new initializer for this type.
+ pub fn new() -> impl PinInit<Self> {
+ // INVARIANT: Pin-init initializers can't be used on an existing `Arc`, so this value will
+ // not be constructed in an `Arc` that already has a `ListArc`.
+ ListLinks {
+ inner: Opaque::new(ListLinksFields {
+ prev: ptr::null_mut(),
+ next: ptr::null_mut(),
+ }),
+ }
+ }
+
+ /// # Safety
+ ///
+ /// `me` must be dereferenceable.
+ #[inline]
+ unsafe fn fields(me: *mut Self) -> *mut ListLinksFields {
+ // SAFETY: The caller promises that the pointer is valid.
+ unsafe { Opaque::raw_get(ptr::addr_of!((*me).inner)) }
+ }
+
+ /// # Safety
+ ///
+ /// `me` must be dereferenceable.
+ #[inline]
+ unsafe fn from_fields(me: *mut ListLinksFields) -> *mut Self {
+ me.cast()
+ }
+}
+
+/// Similar to [`ListLinks`], but also contains a pointer to the full value.
+///
+/// This type can be used instead of [`ListLinks`] to support lists with trait objects.
+#[repr(C)]
+pub struct ListLinksSelfPtr<T: ?Sized, const ID: u64 = 0> {
+ /// The `ListLinks` field inside this value.
+ ///
+ /// This is public so that it can be used with `impl_has_list_links!`.
+ pub inner: ListLinks<ID>,
+ // UnsafeCell is not enough here because we use `Opaque::uninit` as a dummy value, and
+ // `ptr::null()` doesn't work for `T: ?Sized`.
+ self_ptr: Opaque<*const T>,
+}
+
+// SAFETY: The fields of a ListLinksSelfPtr can be moved across thread boundaries.
+unsafe impl<T: ?Sized + Send, const ID: u64> Send for ListLinksSelfPtr<T, ID> {}
+// SAFETY: The type is opaque so immutable references to a ListLinksSelfPtr are useless. Therefore,
+// it's okay to have immutable access to a ListLinks from several threads at once.
+//
+// Note that `inner` being a public field does not prevent this type from being opaque, since
+// `inner` is a opaque type.
+unsafe impl<T: ?Sized + Sync, const ID: u64> Sync for ListLinksSelfPtr<T, ID> {}
+
+impl<T: ?Sized, const ID: u64> ListLinksSelfPtr<T, ID> {
+ /// The offset from the [`ListLinks`] to the self pointer field.
+ pub const LIST_LINKS_SELF_PTR_OFFSET: usize = core::mem::offset_of!(Self, self_ptr);
+
+ /// Creates a new initializer for this type.
+ pub fn new() -> impl PinInit<Self> {
+ // INVARIANT: Pin-init initializers can't be used on an existing `Arc`, so this value will
+ // not be constructed in an `Arc` that already has a `ListArc`.
+ Self {
+ inner: ListLinks {
+ inner: Opaque::new(ListLinksFields {
+ prev: ptr::null_mut(),
+ next: ptr::null_mut(),
+ }),
+ },
+ self_ptr: Opaque::uninit(),
+ }
+ }
+}
+
+impl<T: ?Sized + ListItem<ID>, const ID: u64> List<T, ID> {
+ /// Creates a new empty list.
+ pub const fn new() -> Self {
+ Self {
+ first: ptr::null_mut(),
+ _ty: PhantomData,
+ }
+ }
+
+ /// Returns whether this list is empty.
+ pub fn is_empty(&self) -> bool {
+ self.first.is_null()
+ }
+
+ /// Add the provided item to the back of the list.
+ pub fn push_back(&mut self, item: ListArc<T, ID>) {
+ let raw_item = ListArc::into_raw(item);
+ // SAFETY:
+ // * We just got `raw_item` from a `ListArc`, so it's in an `Arc`.
+ // * Since we have ownership of the `ListArc`, `post_remove` must have been called after
+ // the most recent call to `prepare_to_insert`, if any.
+ // * We own the `ListArc`.
+ // * Removing items from this list is always done using `remove_internal_inner`, which
+ // calls `post_remove` before giving up ownership.
+ let list_links = unsafe { T::prepare_to_insert(raw_item) };
+ // SAFETY: We have not yet called `post_remove`, so `list_links` is still valid.
+ let item = unsafe { ListLinks::fields(list_links) };
+
+ if self.first.is_null() {
+ self.first = item;
+ // SAFETY: The caller just gave us ownership of these fields.
+ // INVARIANT: A linked list with one item should be cyclic.
+ unsafe {
+ (*item).next = item;
+ (*item).prev = item;
+ }
+ } else {
+ let next = self.first;
+ // SAFETY: By the type invariant, this pointer is valid or null. We just checked that
+ // it's not null, so it must be valid.
+ let prev = unsafe { (*next).prev };
+ // SAFETY: Pointers in a linked list are never dangling, and the caller just gave us
+ // ownership of the fields on `item`.
+ // INVARIANT: This correctly inserts `item` between `prev` and `next`.
+ unsafe {
+ (*item).next = next;
+ (*item).prev = prev;
+ (*prev).next = item;
+ (*next).prev = item;
+ }
+ }
+ }
+
+ /// Add the provided item to the front of the list.
+ pub fn push_front(&mut self, item: ListArc<T, ID>) {
+ let raw_item = ListArc::into_raw(item);
+ // SAFETY:
+ // * We just got `raw_item` from a `ListArc`, so it's in an `Arc`.
+ // * If this requirement is violated, then the previous caller of `prepare_to_insert`
+ // violated the safety requirement that they can't give up ownership of the `ListArc`
+ // until they call `post_remove`.
+ // * We own the `ListArc`.
+ // * Removing items] from this list is always done using `remove_internal_inner`, which
+ // calls `post_remove` before giving up ownership.
+ let list_links = unsafe { T::prepare_to_insert(raw_item) };
+ // SAFETY: We have not yet called `post_remove`, so `list_links` is still valid.
+ let item = unsafe { ListLinks::fields(list_links) };
+
+ if self.first.is_null() {
+ // SAFETY: The caller just gave us ownership of these fields.
+ // INVARIANT: A linked list with one item should be cyclic.
+ unsafe {
+ (*item).next = item;
+ (*item).prev = item;
+ }
+ } else {
+ let next = self.first;
+ // SAFETY: We just checked that `next` is non-null.
+ let prev = unsafe { (*next).prev };
+ // SAFETY: Pointers in a linked list are never dangling, and the caller just gave us
+ // ownership of the fields on `item`.
+ // INVARIANT: This correctly inserts `item` between `prev` and `next`.
+ unsafe {
+ (*item).next = next;
+ (*item).prev = prev;
+ (*prev).next = item;
+ (*next).prev = item;
+ }
+ }
+ self.first = item;
+ }
+
+ /// Removes the last item from this list.
+ pub fn pop_back(&mut self) -> Option<ListArc<T, ID>> {
+ if self.first.is_null() {
+ return None;
+ }
+
+ // SAFETY: We just checked that the list is not empty.
+ let last = unsafe { (*self.first).prev };
+ // SAFETY: The last item of this list is in this list.
+ Some(unsafe { self.remove_internal(last) })
+ }
+
+ /// Removes the first item from this list.
+ pub fn pop_front(&mut self) -> Option<ListArc<T, ID>> {
+ if self.first.is_null() {
+ return None;
+ }
+
+ // SAFETY: The first item of this list is in this list.
+ Some(unsafe { self.remove_internal(self.first) })
+ }
+
+ /// Removes the provided item from this list and returns it.
+ ///
+ /// This returns `None` if the item is not in the list. (Note that by the safety requirements,
+ /// this means that the item is not in any list.)
+ ///
+ /// # Safety
+ ///
+ /// `item` must not be in a different linked list (with the same id).
+ pub unsafe fn remove(&mut self, item: &T) -> Option<ListArc<T, ID>> {
+ // SAFETY: TODO.
+ let mut item = unsafe { ListLinks::fields(T::view_links(item)) };
+ // SAFETY: The user provided a reference, and reference are never dangling.
+ //
+ // As for why this is not a data race, there are two cases:
+ //
+ // * If `item` is not in any list, then these fields are read-only and null.
+ // * If `item` is in this list, then we have exclusive access to these fields since we
+ // have a mutable reference to the list.
+ //
+ // In either case, there's no race.
+ let ListLinksFields { next, prev } = unsafe { *item };
+
+ debug_assert_eq!(next.is_null(), prev.is_null());
+ if !next.is_null() {
+ // This is really a no-op, but this ensures that `item` is a raw pointer that was
+ // obtained without going through a pointer->reference->pointer conversion roundtrip.
+ // This ensures that the list is valid under the more restrictive strict provenance
+ // ruleset.
+ //
+ // SAFETY: We just checked that `next` is not null, and it's not dangling by the
+ // list invariants.
+ unsafe {
+ debug_assert_eq!(item, (*next).prev);
+ item = (*next).prev;
+ }
+
+ // SAFETY: We just checked that `item` is in a list, so the caller guarantees that it
+ // is in this list. The pointers are in the right order.
+ Some(unsafe { self.remove_internal_inner(item, next, prev) })
+ } else {
+ None
+ }
+ }
+
+ /// Removes the provided item from the list.
+ ///
+ /// # Safety
+ ///
+ /// `item` must point at an item in this list.
+ unsafe fn remove_internal(&mut self, item: *mut ListLinksFields) -> ListArc<T, ID> {
+ // SAFETY: The caller promises that this pointer is not dangling, and there's no data race
+ // since we have a mutable reference to the list containing `item`.
+ let ListLinksFields { next, prev } = unsafe { *item };
+ // SAFETY: The pointers are ok and in the right order.
+ unsafe { self.remove_internal_inner(item, next, prev) }
+ }
+
+ /// Removes the provided item from the list.
+ ///
+ /// # Safety
+ ///
+ /// The `item` pointer must point at an item in this list, and we must have `(*item).next ==
+ /// next` and `(*item).prev == prev`.
+ unsafe fn remove_internal_inner(
+ &mut self,
+ item: *mut ListLinksFields,
+ next: *mut ListLinksFields,
+ prev: *mut ListLinksFields,
+ ) -> ListArc<T, ID> {
+ // SAFETY: We have exclusive access to the pointers of items in the list, and the prev/next
+ // pointers are always valid for items in a list.
+ //
+ // INVARIANT: There are three cases:
+ // * If the list has at least three items, then after removing the item, `prev` and `next`
+ // will be next to each other.
+ // * If the list has two items, then the remaining item will point at itself.
+ // * If the list has one item, then `next == prev == item`, so these writes have no
+ // effect. The list remains unchanged and `item` is still in the list for now.
+ unsafe {
+ (*next).prev = prev;
+ (*prev).next = next;
+ }
+ // SAFETY: We have exclusive access to items in the list.
+ // INVARIANT: `item` is being removed, so the pointers should be null.
+ unsafe {
+ (*item).prev = ptr::null_mut();
+ (*item).next = ptr::null_mut();
+ }
+ // INVARIANT: There are three cases:
+ // * If `item` was not the first item, then `self.first` should remain unchanged.
+ // * If `item` was the first item and there is another item, then we just updated
+ // `prev->next` to `next`, which is the new first item, and setting `item->next` to null
+ // did not modify `prev->next`.
+ // * If `item` was the only item in the list, then `prev == item`, and we just set
+ // `item->next` to null, so this correctly sets `first` to null now that the list is
+ // empty.
+ if self.first == item {
+ // SAFETY: The `prev` pointer is the value that `item->prev` had when it was in this
+ // list, so it must be valid. There is no race since `prev` is still in the list and we
+ // still have exclusive access to the list.
+ self.first = unsafe { (*prev).next };
+ }
+
+ // SAFETY: `item` used to be in the list, so it is dereferenceable by the type invariants
+ // of `List`.
+ let list_links = unsafe { ListLinks::from_fields(item) };
+ // SAFETY: Any pointer in the list originates from a `prepare_to_insert` call.
+ let raw_item = unsafe { T::post_remove(list_links) };
+ // SAFETY: The above call to `post_remove` guarantees that we can recreate the `ListArc`.
+ unsafe { ListArc::from_raw(raw_item) }
+ }
+
+ /// Moves all items from `other` into `self`.
+ ///
+ /// The items of `other` are added to the back of `self`, so the last item of `other` becomes
+ /// the last item of `self`.
+ pub fn push_all_back(&mut self, other: &mut List<T, ID>) {
+ // First, we insert the elements into `self`. At the end, we make `other` empty.
+ if self.is_empty() {
+ // INVARIANT: All of the elements in `other` become elements of `self`.
+ self.first = other.first;
+ } else if !other.is_empty() {
+ let other_first = other.first;
+ // SAFETY: The other list is not empty, so this pointer is valid.
+ let other_last = unsafe { (*other_first).prev };
+ let self_first = self.first;
+ // SAFETY: The self list is not empty, so this pointer is valid.
+ let self_last = unsafe { (*self_first).prev };
+
+ // SAFETY: We have exclusive access to both lists, so we can update the pointers.
+ // INVARIANT: This correctly sets the pointers to merge both lists. We do not need to
+ // update `self.first` because the first element of `self` does not change.
+ unsafe {
+ (*self_first).prev = other_last;
+ (*other_last).next = self_first;
+ (*self_last).next = other_first;
+ (*other_first).prev = self_last;
+ }
+ }
+
+ // INVARIANT: The other list is now empty, so update its pointer.
+ other.first = ptr::null_mut();
+ }
+
+ /// Returns a cursor to the first element of the list.
+ ///
+ /// If the list is empty, this returns `None`.
+ pub fn cursor_front(&mut self) -> Option<Cursor<'_, T, ID>> {
+ if self.first.is_null() {
+ None
+ } else {
+ Some(Cursor {
+ current: self.first,
+ list: self,
+ })
+ }
+ }
+
+ /// Creates an iterator over the list.
+ pub fn iter(&self) -> Iter<'_, T, ID> {
+ // INVARIANT: If the list is empty, both pointers are null. Otherwise, both pointers point
+ // at the first element of the same list.
+ Iter {
+ current: self.first,
+ stop: self.first,
+ _ty: PhantomData,
+ }
+ }
+}
+
+impl<T: ?Sized + ListItem<ID>, const ID: u64> Default for List<T, ID> {
+ fn default() -> Self {
+ List::new()
+ }
+}
+
+impl<T: ?Sized + ListItem<ID>, const ID: u64> Drop for List<T, ID> {
+ fn drop(&mut self) {
+ while let Some(item) = self.pop_front() {
+ drop(item);
+ }
+ }
+}
+
+/// An iterator over a [`List`].
+///
+/// # Invariants
+///
+/// * There must be a [`List`] that is immutably borrowed for the duration of `'a`.
+/// * The `current` pointer is null or points at a value in that [`List`].
+/// * The `stop` pointer is equal to the `first` field of that [`List`].
+#[derive(Clone)]
+pub struct Iter<'a, T: ?Sized + ListItem<ID>, const ID: u64 = 0> {
+ current: *mut ListLinksFields,
+ stop: *mut ListLinksFields,
+ _ty: PhantomData<&'a ListArc<T, ID>>,
+}
+
+impl<'a, T: ?Sized + ListItem<ID>, const ID: u64> Iterator for Iter<'a, T, ID> {
+ type Item = ArcBorrow<'a, T>;
+
+ fn next(&mut self) -> Option<ArcBorrow<'a, T>> {
+ if self.current.is_null() {
+ return None;
+ }
+
+ let current = self.current;
+
+ // SAFETY: We just checked that `current` is not null, so it is in a list, and hence not
+ // dangling. There's no race because the iterator holds an immutable borrow to the list.
+ let next = unsafe { (*current).next };
+ // INVARIANT: If `current` was the last element of the list, then this updates it to null.
+ // Otherwise, we update it to the next element.
+ self.current = if next != self.stop {
+ next
+ } else {
+ ptr::null_mut()
+ };
+
+ // SAFETY: The `current` pointer points at a value in the list.
+ let item = unsafe { T::view_value(ListLinks::from_fields(current)) };
+ // SAFETY:
+ // * All values in a list are stored in an `Arc`.
+ // * The value cannot be removed from the list for the duration of the lifetime annotated
+ // on the returned `ArcBorrow`, because removing it from the list would require mutable
+ // access to the list. However, the `ArcBorrow` is annotated with the iterator's
+ // lifetime, and the list is immutably borrowed for that lifetime.
+ // * Values in a list never have a `UniqueArc` reference.
+ Some(unsafe { ArcBorrow::from_raw(item) })
+ }
+}
+
+/// A cursor into a [`List`].
+///
+/// # Invariants
+///
+/// The `current` pointer points a value in `list`.
+pub struct Cursor<'a, T: ?Sized + ListItem<ID>, const ID: u64 = 0> {
+ current: *mut ListLinksFields,
+ list: &'a mut List<T, ID>,
+}
+
+impl<'a, T: ?Sized + ListItem<ID>, const ID: u64> Cursor<'a, T, ID> {
+ /// Access the current element of this cursor.
+ pub fn current(&self) -> ArcBorrow<'_, T> {
+ // SAFETY: The `current` pointer points a value in the list.
+ let me = unsafe { T::view_value(ListLinks::from_fields(self.current)) };
+ // SAFETY:
+ // * All values in a list are stored in an `Arc`.
+ // * The value cannot be removed from the list for the duration of the lifetime annotated
+ // on the returned `ArcBorrow`, because removing it from the list would require mutable
+ // access to the cursor or the list. However, the `ArcBorrow` holds an immutable borrow
+ // on the cursor, which in turn holds a mutable borrow on the list, so any such
+ // mutable access requires first releasing the immutable borrow on the cursor.
+ // * Values in a list never have a `UniqueArc` reference, because the list has a `ListArc`
+ // reference, and `UniqueArc` references must be unique.
+ unsafe { ArcBorrow::from_raw(me) }
+ }
+
+ /// Move the cursor to the next element.
+ pub fn next(self) -> Option<Cursor<'a, T, ID>> {
+ // SAFETY: The `current` field is always in a list.
+ let next = unsafe { (*self.current).next };
+
+ if next == self.list.first {
+ None
+ } else {
+ // INVARIANT: Since `self.current` is in the `list`, its `next` pointer is also in the
+ // `list`.
+ Some(Cursor {
+ current: next,
+ list: self.list,
+ })
+ }
+ }
+
+ /// Move the cursor to the previous element.
+ pub fn prev(self) -> Option<Cursor<'a, T, ID>> {
+ // SAFETY: The `current` field is always in a list.
+ let prev = unsafe { (*self.current).prev };
+
+ if self.current == self.list.first {
+ None
+ } else {
+ // INVARIANT: Since `self.current` is in the `list`, its `prev` pointer is also in the
+ // `list`.
+ Some(Cursor {
+ current: prev,
+ list: self.list,
+ })
+ }
+ }
+
+ /// Remove the current element from the list.
+ pub fn remove(self) -> ListArc<T, ID> {
+ // SAFETY: The `current` pointer always points at a member of the list.
+ unsafe { self.list.remove_internal(self.current) }
+ }
+}
+
+impl<'a, T: ?Sized + ListItem<ID>, const ID: u64> FusedIterator for Iter<'a, T, ID> {}
+
+impl<'a, T: ?Sized + ListItem<ID>, const ID: u64> IntoIterator for &'a List<T, ID> {
+ type IntoIter = Iter<'a, T, ID>;
+ type Item = ArcBorrow<'a, T>;
+
+ fn into_iter(self) -> Iter<'a, T, ID> {
+ self.iter()
+ }
+}
+
+/// An owning iterator into a [`List`].
+pub struct IntoIter<T: ?Sized + ListItem<ID>, const ID: u64 = 0> {
+ list: List<T, ID>,
+}
+
+impl<T: ?Sized + ListItem<ID>, const ID: u64> Iterator for IntoIter<T, ID> {
+ type Item = ListArc<T, ID>;
+
+ fn next(&mut self) -> Option<ListArc<T, ID>> {
+ self.list.pop_front()
+ }
+}
+
+impl<T: ?Sized + ListItem<ID>, const ID: u64> FusedIterator for IntoIter<T, ID> {}
+
+impl<T: ?Sized + ListItem<ID>, const ID: u64> DoubleEndedIterator for IntoIter<T, ID> {
+ fn next_back(&mut self) -> Option<ListArc<T, ID>> {
+ self.list.pop_back()
+ }
+}
+
+impl<T: ?Sized + ListItem<ID>, const ID: u64> IntoIterator for List<T, ID> {
+ type IntoIter = IntoIter<T, ID>;
+ type Item = ListArc<T, ID>;
+
+ fn into_iter(self) -> IntoIter<T, ID> {
+ IntoIter { list: self }
+ }
+}
diff --git a/rust/kernel/list/arc.rs b/rust/kernel/list/arc.rs
new file mode 100644
index 000000000000..13c50df37b89
--- /dev/null
+++ b/rust/kernel/list/arc.rs
@@ -0,0 +1,521 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! A wrapper around `Arc` for linked lists.
+
+use crate::alloc::{AllocError, Flags};
+use crate::prelude::*;
+use crate::sync::{Arc, ArcBorrow, UniqueArc};
+use core::marker::PhantomPinned;
+use core::ops::Deref;
+use core::pin::Pin;
+use core::sync::atomic::{AtomicBool, Ordering};
+
+/// Declares that this type has some way to ensure that there is exactly one `ListArc` instance for
+/// this id.
+///
+/// Types that implement this trait should include some kind of logic for keeping track of whether
+/// a [`ListArc`] exists or not. We refer to this logic as "the tracking inside `T`".
+///
+/// We allow the case where the tracking inside `T` thinks that a [`ListArc`] exists, but actually,
+/// there isn't a [`ListArc`]. However, we do not allow the opposite situation where a [`ListArc`]
+/// exists, but the tracking thinks it doesn't. This is because the former can at most result in us
+/// failing to create a [`ListArc`] when the operation could succeed, whereas the latter can result
+/// in the creation of two [`ListArc`] references. Only the latter situation can lead to memory
+/// safety issues.
+///
+/// A consequence of the above is that you may implement the tracking inside `T` by not actually
+/// keeping track of anything. To do this, you always claim that a [`ListArc`] exists, even if
+/// there isn't one. This implementation is allowed by the above rule, but it means that
+/// [`ListArc`] references can only be created if you have ownership of *all* references to the
+/// refcounted object, as you otherwise have no way of knowing whether a [`ListArc`] exists.
+pub trait ListArcSafe<const ID: u64 = 0> {
+ /// Informs the tracking inside this type that it now has a [`ListArc`] reference.
+ ///
+ /// This method may be called even if the tracking inside this type thinks that a `ListArc`
+ /// reference exists. (But only if that's not actually the case.)
+ ///
+ /// # Safety
+ ///
+ /// Must not be called if a [`ListArc`] already exist for this value.
+ unsafe fn on_create_list_arc_from_unique(self: Pin<&mut Self>);
+
+ /// Informs the tracking inside this type that there is no [`ListArc`] reference anymore.
+ ///
+ /// # Safety
+ ///
+ /// Must only be called if there is no [`ListArc`] reference, but the tracking thinks there is.
+ unsafe fn on_drop_list_arc(&self);
+}
+
+/// Declares that this type is able to safely attempt to create `ListArc`s at any time.
+///
+/// # Safety
+///
+/// The guarantees of `try_new_list_arc` must be upheld.
+pub unsafe trait TryNewListArc<const ID: u64 = 0>: ListArcSafe<ID> {
+ /// Attempts to convert an `Arc<Self>` into an `ListArc<Self>`. Returns `true` if the
+ /// conversion was successful.
+ ///
+ /// This method should not be called directly. Use [`ListArc::try_from_arc`] instead.
+ ///
+ /// # Guarantees
+ ///
+ /// If this call returns `true`, then there is no [`ListArc`] pointing to this value.
+ /// Additionally, this call will have transitioned the tracking inside `Self` from not thinking
+ /// that a [`ListArc`] exists, to thinking that a [`ListArc`] exists.
+ fn try_new_list_arc(&self) -> bool;
+}
+
+/// Declares that this type supports [`ListArc`].
+///
+/// This macro supports a few different strategies for implementing the tracking inside the type:
+///
+/// * The `untracked` strategy does not actually keep track of whether a [`ListArc`] exists. When
+/// using this strategy, the only way to create a [`ListArc`] is using a [`UniqueArc`].
+/// * The `tracked_by` strategy defers the tracking to a field of the struct. The user much specify
+/// which field to defer the tracking to. The field must implement [`ListArcSafe`]. If the field
+/// implements [`TryNewListArc`], then the type will also implement [`TryNewListArc`].
+///
+/// The `tracked_by` strategy is usually used by deferring to a field of type
+/// [`AtomicTracker`]. However, it is also possible to defer the tracking to another struct
+/// using also using this macro.
+#[macro_export]
+macro_rules! impl_list_arc_safe {
+ (impl$({$($generics:tt)*})? ListArcSafe<$num:tt> for $t:ty { untracked; } $($rest:tt)*) => {
+ impl$(<$($generics)*>)? $crate::list::ListArcSafe<$num> for $t {
+ unsafe fn on_create_list_arc_from_unique(self: ::core::pin::Pin<&mut Self>) {}
+ unsafe fn on_drop_list_arc(&self) {}
+ }
+ $crate::list::impl_list_arc_safe! { $($rest)* }
+ };
+
+ (impl$({$($generics:tt)*})? ListArcSafe<$num:tt> for $t:ty {
+ tracked_by $field:ident : $fty:ty;
+ } $($rest:tt)*) => {
+ impl$(<$($generics)*>)? $crate::list::ListArcSafe<$num> for $t {
+ unsafe fn on_create_list_arc_from_unique(self: ::core::pin::Pin<&mut Self>) {
+ $crate::assert_pinned!($t, $field, $fty, inline);
+
+ // SAFETY: This field is structurally pinned as per the above assertion.
+ let field = unsafe {
+ ::core::pin::Pin::map_unchecked_mut(self, |me| &mut me.$field)
+ };
+ // SAFETY: The caller promises that there is no `ListArc`.
+ unsafe {
+ <$fty as $crate::list::ListArcSafe<$num>>::on_create_list_arc_from_unique(field)
+ };
+ }
+ unsafe fn on_drop_list_arc(&self) {
+ // SAFETY: The caller promises that there is no `ListArc` reference, and also
+ // promises that the tracking thinks there is a `ListArc` reference.
+ unsafe { <$fty as $crate::list::ListArcSafe<$num>>::on_drop_list_arc(&self.$field) };
+ }
+ }
+ unsafe impl$(<$($generics)*>)? $crate::list::TryNewListArc<$num> for $t
+ where
+ $fty: TryNewListArc<$num>,
+ {
+ fn try_new_list_arc(&self) -> bool {
+ <$fty as $crate::list::TryNewListArc<$num>>::try_new_list_arc(&self.$field)
+ }
+ }
+ $crate::list::impl_list_arc_safe! { $($rest)* }
+ };
+
+ () => {};
+}
+pub use impl_list_arc_safe;
+
+/// A wrapper around [`Arc`] that's guaranteed unique for the given id.
+///
+/// The `ListArc` type can be thought of as a special reference to a refcounted object that owns the
+/// permission to manipulate the `next`/`prev` pointers stored in the refcounted object. By ensuring
+/// that each object has only one `ListArc` reference, the owner of that reference is assured
+/// exclusive access to the `next`/`prev` pointers. When a `ListArc` is inserted into a [`List`],
+/// the [`List`] takes ownership of the `ListArc` reference.
+///
+/// There are various strategies to ensuring that a value has only one `ListArc` reference. The
+/// simplest is to convert a [`UniqueArc`] into a `ListArc`. However, the refcounted object could
+/// also keep track of whether a `ListArc` exists using a boolean, which could allow for the
+/// creation of new `ListArc` references from an [`Arc`] reference. Whatever strategy is used, the
+/// relevant tracking is referred to as "the tracking inside `T`", and the [`ListArcSafe`] trait
+/// (and its subtraits) are used to update the tracking when a `ListArc` is created or destroyed.
+///
+/// Note that we allow the case where the tracking inside `T` thinks that a `ListArc` exists, but
+/// actually, there isn't a `ListArc`. However, we do not allow the opposite situation where a
+/// `ListArc` exists, but the tracking thinks it doesn't. This is because the former can at most
+/// result in us failing to create a `ListArc` when the operation could succeed, whereas the latter
+/// can result in the creation of two `ListArc` references.
+///
+/// While this `ListArc` is unique for the given id, there still might exist normal `Arc`
+/// references to the object.
+///
+/// # Invariants
+///
+/// * Each reference counted object has at most one `ListArc` for each value of `ID`.
+/// * The tracking inside `T` is aware that a `ListArc` reference exists.
+///
+/// [`List`]: crate::list::List
+#[repr(transparent)]
+#[cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, derive(core::marker::CoercePointee))]
+pub struct ListArc<T, const ID: u64 = 0>
+where
+ T: ListArcSafe<ID> + ?Sized,
+{
+ arc: Arc<T>,
+}
+
+impl<T: ListArcSafe<ID>, const ID: u64> ListArc<T, ID> {
+ /// Constructs a new reference counted instance of `T`.
+ #[inline]
+ pub fn new(contents: T, flags: Flags) -> Result<Self, AllocError> {
+ Ok(Self::from(UniqueArc::new(contents, flags)?))
+ }
+
+ /// Use the given initializer to in-place initialize a `T`.
+ ///
+ /// If `T: !Unpin` it will not be able to move afterwards.
+ // We don't implement `InPlaceInit` because `ListArc` is implicitly pinned. This is similar to
+ // what we do for `Arc`.
+ #[inline]
+ pub fn pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Self, E>
+ where
+ E: From<AllocError>,
+ {
+ Ok(Self::from(UniqueArc::try_pin_init(init, flags)?))
+ }
+
+ /// Use the given initializer to in-place initialize a `T`.
+ ///
+ /// This is equivalent to [`ListArc<T>::pin_init`], since a [`ListArc`] is always pinned.
+ #[inline]
+ pub fn init<E>(init: impl Init<T, E>, flags: Flags) -> Result<Self, E>
+ where
+ E: From<AllocError>,
+ {
+ Ok(Self::from(UniqueArc::try_init(init, flags)?))
+ }
+}
+
+impl<T, const ID: u64> From<UniqueArc<T>> for ListArc<T, ID>
+where
+ T: ListArcSafe<ID> + ?Sized,
+{
+ /// Convert a [`UniqueArc`] into a [`ListArc`].
+ #[inline]
+ fn from(unique: UniqueArc<T>) -> Self {
+ Self::from(Pin::from(unique))
+ }
+}
+
+impl<T, const ID: u64> From<Pin<UniqueArc<T>>> for ListArc<T, ID>
+where
+ T: ListArcSafe<ID> + ?Sized,
+{
+ /// Convert a pinned [`UniqueArc`] into a [`ListArc`].
+ #[inline]
+ fn from(mut unique: Pin<UniqueArc<T>>) -> Self {
+ // SAFETY: We have a `UniqueArc`, so there is no `ListArc`.
+ unsafe { T::on_create_list_arc_from_unique(unique.as_mut()) };
+ let arc = Arc::from(unique);
+ // SAFETY: We just called `on_create_list_arc_from_unique` on an arc without a `ListArc`,
+ // so we can create a `ListArc`.
+ unsafe { Self::transmute_from_arc(arc) }
+ }
+}
+
+impl<T, const ID: u64> ListArc<T, ID>
+where
+ T: ListArcSafe<ID> + ?Sized,
+{
+ /// Creates two `ListArc`s from a [`UniqueArc`].
+ ///
+ /// The two ids must be different.
+ #[inline]
+ pub fn pair_from_unique<const ID2: u64>(unique: UniqueArc<T>) -> (Self, ListArc<T, ID2>)
+ where
+ T: ListArcSafe<ID2>,
+ {
+ Self::pair_from_pin_unique(Pin::from(unique))
+ }
+
+ /// Creates two `ListArc`s from a pinned [`UniqueArc`].
+ ///
+ /// The two ids must be different.
+ #[inline]
+ pub fn pair_from_pin_unique<const ID2: u64>(
+ mut unique: Pin<UniqueArc<T>>,
+ ) -> (Self, ListArc<T, ID2>)
+ where
+ T: ListArcSafe<ID2>,
+ {
+ build_assert!(ID != ID2);
+
+ // SAFETY: We have a `UniqueArc`, so there is no `ListArc`.
+ unsafe { <T as ListArcSafe<ID>>::on_create_list_arc_from_unique(unique.as_mut()) };
+ // SAFETY: We have a `UniqueArc`, so there is no `ListArc`.
+ unsafe { <T as ListArcSafe<ID2>>::on_create_list_arc_from_unique(unique.as_mut()) };
+
+ let arc1 = Arc::from(unique);
+ let arc2 = Arc::clone(&arc1);
+
+ // SAFETY: We just called `on_create_list_arc_from_unique` on an arc without a `ListArc`
+ // for both IDs (which are different), so we can create two `ListArc`s.
+ unsafe {
+ (
+ Self::transmute_from_arc(arc1),
+ ListArc::transmute_from_arc(arc2),
+ )
+ }
+ }
+
+ /// Try to create a new `ListArc`.
+ ///
+ /// This fails if this value already has a `ListArc`.
+ pub fn try_from_arc(arc: Arc<T>) -> Result<Self, Arc<T>>
+ where
+ T: TryNewListArc<ID>,
+ {
+ if arc.try_new_list_arc() {
+ // SAFETY: The `try_new_list_arc` method returned true, so we made the tracking think
+ // that a `ListArc` exists. This lets us create a `ListArc`.
+ Ok(unsafe { Self::transmute_from_arc(arc) })
+ } else {
+ Err(arc)
+ }
+ }
+
+ /// Try to create a new `ListArc`.
+ ///
+ /// This fails if this value already has a `ListArc`.
+ pub fn try_from_arc_borrow(arc: ArcBorrow<'_, T>) -> Option<Self>
+ where
+ T: TryNewListArc<ID>,
+ {
+ if arc.try_new_list_arc() {
+ // SAFETY: The `try_new_list_arc` method returned true, so we made the tracking think
+ // that a `ListArc` exists. This lets us create a `ListArc`.
+ Some(unsafe { Self::transmute_from_arc(Arc::from(arc)) })
+ } else {
+ None
+ }
+ }
+
+ /// Try to create a new `ListArc`.
+ ///
+ /// If it's not possible to create a new `ListArc`, then the `Arc` is dropped. This will never
+ /// run the destructor of the value.
+ pub fn try_from_arc_or_drop(arc: Arc<T>) -> Option<Self>
+ where
+ T: TryNewListArc<ID>,
+ {
+ match Self::try_from_arc(arc) {
+ Ok(list_arc) => Some(list_arc),
+ Err(arc) => Arc::into_unique_or_drop(arc).map(Self::from),
+ }
+ }
+
+ /// Transmutes an [`Arc`] into a `ListArc` without updating the tracking inside `T`.
+ ///
+ /// # Safety
+ ///
+ /// * The value must not already have a `ListArc` reference.
+ /// * The tracking inside `T` must think that there is a `ListArc` reference.
+ #[inline]
+ unsafe fn transmute_from_arc(arc: Arc<T>) -> Self {
+ // INVARIANT: By the safety requirements, the invariants on `ListArc` are satisfied.
+ Self { arc }
+ }
+
+ /// Transmutes a `ListArc` into an [`Arc`] without updating the tracking inside `T`.
+ ///
+ /// After this call, the tracking inside `T` will still think that there is a `ListArc`
+ /// reference.
+ #[inline]
+ fn transmute_to_arc(self) -> Arc<T> {
+ // Use a transmute to skip destructor.
+ //
+ // SAFETY: ListArc is repr(transparent).
+ unsafe { core::mem::transmute(self) }
+ }
+
+ /// Convert ownership of this `ListArc` into a raw pointer.
+ ///
+ /// The returned pointer is indistinguishable from pointers returned by [`Arc::into_raw`]. The
+ /// tracking inside `T` will still think that a `ListArc` exists after this call.
+ #[inline]
+ pub fn into_raw(self) -> *const T {
+ Arc::into_raw(Self::transmute_to_arc(self))
+ }
+
+ /// Take ownership of the `ListArc` from a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// * `ptr` must satisfy the safety requirements of [`Arc::from_raw`].
+ /// * The value must not already have a `ListArc` reference.
+ /// * The tracking inside `T` must think that there is a `ListArc` reference.
+ #[inline]
+ pub unsafe fn from_raw(ptr: *const T) -> Self {
+ // SAFETY: The pointer satisfies the safety requirements for `Arc::from_raw`.
+ let arc = unsafe { Arc::from_raw(ptr) };
+ // SAFETY: The value doesn't already have a `ListArc` reference, but the tracking thinks it
+ // does.
+ unsafe { Self::transmute_from_arc(arc) }
+ }
+
+ /// Converts the `ListArc` into an [`Arc`].
+ #[inline]
+ pub fn into_arc(self) -> Arc<T> {
+ let arc = Self::transmute_to_arc(self);
+ // SAFETY: There is no longer a `ListArc`, but the tracking thinks there is.
+ unsafe { T::on_drop_list_arc(&arc) };
+ arc
+ }
+
+ /// Clone a `ListArc` into an [`Arc`].
+ #[inline]
+ pub fn clone_arc(&self) -> Arc<T> {
+ self.arc.clone()
+ }
+
+ /// Returns a reference to an [`Arc`] from the given [`ListArc`].
+ ///
+ /// This is useful when the argument of a function call is an [`&Arc`] (e.g., in a method
+ /// receiver), but we have a [`ListArc`] instead.
+ ///
+ /// [`&Arc`]: Arc
+ #[inline]
+ pub fn as_arc(&self) -> &Arc<T> {
+ &self.arc
+ }
+
+ /// Returns an [`ArcBorrow`] from the given [`ListArc`].
+ ///
+ /// This is useful when the argument of a function call is an [`ArcBorrow`] (e.g., in a method
+ /// receiver), but we have an [`Arc`] instead. Getting an [`ArcBorrow`] is free when optimised.
+ #[inline]
+ pub fn as_arc_borrow(&self) -> ArcBorrow<'_, T> {
+ self.arc.as_arc_borrow()
+ }
+
+ /// Compare whether two [`ListArc`] pointers reference the same underlying object.
+ #[inline]
+ pub fn ptr_eq(this: &Self, other: &Self) -> bool {
+ Arc::ptr_eq(&this.arc, &other.arc)
+ }
+}
+
+impl<T, const ID: u64> Deref for ListArc<T, ID>
+where
+ T: ListArcSafe<ID> + ?Sized,
+{
+ type Target = T;
+
+ #[inline]
+ fn deref(&self) -> &Self::Target {
+ self.arc.deref()
+ }
+}
+
+impl<T, const ID: u64> Drop for ListArc<T, ID>
+where
+ T: ListArcSafe<ID> + ?Sized,
+{
+ #[inline]
+ fn drop(&mut self) {
+ // SAFETY: There is no longer a `ListArc`, but the tracking thinks there is by the type
+ // invariants on `Self`.
+ unsafe { T::on_drop_list_arc(&self.arc) };
+ }
+}
+
+impl<T, const ID: u64> AsRef<Arc<T>> for ListArc<T, ID>
+where
+ T: ListArcSafe<ID> + ?Sized,
+{
+ #[inline]
+ fn as_ref(&self) -> &Arc<T> {
+ self.as_arc()
+ }
+}
+
+// This is to allow coercion from `ListArc<T>` to `ListArc<U>` if `T` can be converted to the
+// dynamically-sized type (DST) `U`.
+#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
+impl<T, U, const ID: u64> core::ops::CoerceUnsized<ListArc<U, ID>> for ListArc<T, ID>
+where
+ T: ListArcSafe<ID> + core::marker::Unsize<U> + ?Sized,
+ U: ListArcSafe<ID> + ?Sized,
+{
+}
+
+// This is to allow `ListArc<U>` to be dispatched on when `ListArc<T>` can be coerced into
+// `ListArc<U>`.
+#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
+impl<T, U, const ID: u64> core::ops::DispatchFromDyn<ListArc<U, ID>> for ListArc<T, ID>
+where
+ T: ListArcSafe<ID> + core::marker::Unsize<U> + ?Sized,
+ U: ListArcSafe<ID> + ?Sized,
+{
+}
+
+/// A utility for tracking whether a [`ListArc`] exists using an atomic.
+///
+/// # Invariant
+///
+/// If the boolean is `false`, then there is no [`ListArc`] for this value.
+#[repr(transparent)]
+pub struct AtomicTracker<const ID: u64 = 0> {
+ inner: AtomicBool,
+ // This value needs to be pinned to justify the INVARIANT: comment in `AtomicTracker::new`.
+ _pin: PhantomPinned,
+}
+
+impl<const ID: u64> AtomicTracker<ID> {
+ /// Creates a new initializer for this type.
+ pub fn new() -> impl PinInit<Self> {
+ // INVARIANT: Pin-init initializers can't be used on an existing `Arc`, so this value will
+ // not be constructed in an `Arc` that already has a `ListArc`.
+ Self {
+ inner: AtomicBool::new(false),
+ _pin: PhantomPinned,
+ }
+ }
+
+ fn project_inner(self: Pin<&mut Self>) -> &mut AtomicBool {
+ // SAFETY: The `inner` field is not structurally pinned, so we may obtain a mutable
+ // reference to it even if we only have a pinned reference to `self`.
+ unsafe { &mut Pin::into_inner_unchecked(self).inner }
+ }
+}
+
+impl<const ID: u64> ListArcSafe<ID> for AtomicTracker<ID> {
+ unsafe fn on_create_list_arc_from_unique(self: Pin<&mut Self>) {
+ // INVARIANT: We just created a ListArc, so the boolean should be true.
+ *self.project_inner().get_mut() = true;
+ }
+
+ unsafe fn on_drop_list_arc(&self) {
+ // INVARIANT: We just dropped a ListArc, so the boolean should be false.
+ self.inner.store(false, Ordering::Release);
+ }
+}
+
+// SAFETY: If this method returns `true`, then by the type invariant there is no `ListArc` before
+// this call, so it is okay to create a new `ListArc`.
+//
+// The acquire ordering will synchronize with the release store from the destruction of any
+// previous `ListArc`, so if there was a previous `ListArc`, then the destruction of the previous
+// `ListArc` happens-before the creation of the new `ListArc`.
+unsafe impl<const ID: u64> TryNewListArc<ID> for AtomicTracker<ID> {
+ fn try_new_list_arc(&self) -> bool {
+ // INVARIANT: If this method returns true, then the boolean used to be false, and is no
+ // longer false, so it is okay for the caller to create a new [`ListArc`].
+ self.inner
+ .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
+ .is_ok()
+ }
+}
diff --git a/rust/kernel/list/arc_field.rs b/rust/kernel/list/arc_field.rs
new file mode 100644
index 000000000000..c4b9dd503982
--- /dev/null
+++ b/rust/kernel/list/arc_field.rs
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! A field that is exclusively owned by a [`ListArc`].
+//!
+//! This can be used to have reference counted struct where one of the reference counted pointers
+//! has exclusive access to a field of the struct.
+//!
+//! [`ListArc`]: crate::list::ListArc
+
+use core::cell::UnsafeCell;
+
+/// A field owned by a specific [`ListArc`].
+///
+/// [`ListArc`]: crate::list::ListArc
+pub struct ListArcField<T, const ID: u64 = 0> {
+ value: UnsafeCell<T>,
+}
+
+// SAFETY: If the inner type is thread-safe, then it's also okay for `ListArc` to be thread-safe.
+unsafe impl<T: Send + Sync, const ID: u64> Send for ListArcField<T, ID> {}
+// SAFETY: If the inner type is thread-safe, then it's also okay for `ListArc` to be thread-safe.
+unsafe impl<T: Send + Sync, const ID: u64> Sync for ListArcField<T, ID> {}
+
+impl<T, const ID: u64> ListArcField<T, ID> {
+ /// Creates a new `ListArcField`.
+ pub fn new(value: T) -> Self {
+ Self {
+ value: UnsafeCell::new(value),
+ }
+ }
+
+ /// Access the value when we have exclusive access to the `ListArcField`.
+ ///
+ /// This allows access to the field using an `UniqueArc` instead of a `ListArc`.
+ pub fn get_mut(&mut self) -> &mut T {
+ self.value.get_mut()
+ }
+
+ /// Unsafely assert that you have shared access to the `ListArc` for this field.
+ ///
+ /// # Safety
+ ///
+ /// The caller must have shared access to the `ListArc<ID>` containing the struct with this
+ /// field for the duration of the returned reference.
+ pub unsafe fn assert_ref(&self) -> &T {
+ // SAFETY: The caller has shared access to the `ListArc`, so they also have shared access
+ // to this field.
+ unsafe { &*self.value.get() }
+ }
+
+ /// Unsafely assert that you have mutable access to the `ListArc` for this field.
+ ///
+ /// # Safety
+ ///
+ /// The caller must have mutable access to the `ListArc<ID>` containing the struct with this
+ /// field for the duration of the returned reference.
+ #[expect(clippy::mut_from_ref)]
+ pub unsafe fn assert_mut(&self) -> &mut T {
+ // SAFETY: The caller has exclusive access to the `ListArc`, so they also have exclusive
+ // access to this field.
+ unsafe { &mut *self.value.get() }
+ }
+}
+
+/// Defines getters for a [`ListArcField`].
+#[macro_export]
+macro_rules! define_list_arc_field_getter {
+ ($pub:vis fn $name:ident(&self $(<$id:tt>)?) -> &$typ:ty { $field:ident }
+ $($rest:tt)*
+ ) => {
+ $pub fn $name<'a>(self: &'a $crate::list::ListArc<Self $(, $id)?>) -> &'a $typ {
+ let field = &(&**self).$field;
+ // SAFETY: We have a shared reference to the `ListArc`.
+ unsafe { $crate::list::ListArcField::<$typ $(, $id)?>::assert_ref(field) }
+ }
+
+ $crate::list::define_list_arc_field_getter!($($rest)*);
+ };
+
+ ($pub:vis fn $name:ident(&mut self $(<$id:tt>)?) -> &mut $typ:ty { $field:ident }
+ $($rest:tt)*
+ ) => {
+ $pub fn $name<'a>(self: &'a mut $crate::list::ListArc<Self $(, $id)?>) -> &'a mut $typ {
+ let field = &(&**self).$field;
+ // SAFETY: We have a mutable reference to the `ListArc`.
+ unsafe { $crate::list::ListArcField::<$typ $(, $id)?>::assert_mut(field) }
+ }
+
+ $crate::list::define_list_arc_field_getter!($($rest)*);
+ };
+
+ () => {};
+}
+pub use define_list_arc_field_getter;
diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs
new file mode 100644
index 000000000000..a0438537cee1
--- /dev/null
+++ b/rust/kernel/list/impl_list_item_mod.rs
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Helpers for implementing list traits safely.
+
+use crate::list::ListLinks;
+
+/// Declares that this type has a `ListLinks<ID>` field at a fixed offset.
+///
+/// This trait is only used to help implement `ListItem` safely. If `ListItem` is implemented
+/// manually, then this trait is not needed. Use the [`impl_has_list_links!`] macro to implement
+/// this trait.
+///
+/// # Safety
+///
+/// All values of this type must have a `ListLinks<ID>` field at the given offset.
+///
+/// The behavior of `raw_get_list_links` must not be changed.
+pub unsafe trait HasListLinks<const ID: u64 = 0> {
+ /// The offset of the `ListLinks` field.
+ const OFFSET: usize;
+
+ /// Returns a pointer to the [`ListLinks<T, ID>`] field.
+ ///
+ /// # Safety
+ ///
+ /// The provided pointer must point at a valid struct of type `Self`.
+ ///
+ /// [`ListLinks<T, ID>`]: ListLinks
+ // We don't really need this method, but it's necessary for the implementation of
+ // `impl_has_list_links!` to be correct.
+ #[inline]
+ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut ListLinks<ID> {
+ // SAFETY: The caller promises that the pointer is valid. The implementer promises that the
+ // `OFFSET` constant is correct.
+ unsafe { (ptr as *mut u8).add(Self::OFFSET) as *mut ListLinks<ID> }
+ }
+}
+
+/// Implements the [`HasListLinks`] trait for the given type.
+#[macro_export]
+macro_rules! impl_has_list_links {
+ ($(impl$(<$($implarg:ident),*>)?
+ HasListLinks$(<$id:tt>)?
+ for $self:ident $(<$($selfarg:ty),*>)?
+ { self$(.$field:ident)* }
+ )*) => {$(
+ // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the
+ // right type.
+ //
+ // The behavior of `raw_get_list_links` is not changed since the `addr_of_mut!` macro is
+ // equivalent to the pointer offset operation in the trait definition.
+ unsafe impl$(<$($implarg),*>)? $crate::list::HasListLinks$(<$id>)? for
+ $self $(<$($selfarg),*>)?
+ {
+ const OFFSET: usize = ::core::mem::offset_of!(Self, $($field).*) as usize;
+
+ #[inline]
+ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? {
+ // SAFETY: The caller promises that the pointer is not dangling. We know that this
+ // expression doesn't follow any pointers, as the `offset_of!` invocation above
+ // would otherwise not compile.
+ unsafe { ::core::ptr::addr_of_mut!((*ptr)$(.$field)*) }
+ }
+ }
+ )*};
+}
+pub use impl_has_list_links;
+
+/// Declares that the `ListLinks<ID>` field in this struct is inside a `ListLinksSelfPtr<T, ID>`.
+///
+/// # Safety
+///
+/// The `ListLinks<ID>` field of this struct at the offset `HasListLinks<ID>::OFFSET` must be
+/// inside a `ListLinksSelfPtr<T, ID>`.
+pub unsafe trait HasSelfPtr<T: ?Sized, const ID: u64 = 0>
+where
+ Self: HasListLinks<ID>,
+{
+}
+
+/// Implements the [`HasListLinks`] and [`HasSelfPtr`] traits for the given type.
+#[macro_export]
+macro_rules! impl_has_list_links_self_ptr {
+ ($(impl$({$($implarg:tt)*})?
+ HasSelfPtr<$item_type:ty $(, $id:tt)?>
+ for $self:ident $(<$($selfarg:ty),*>)?
+ { self.$field:ident }
+ )*) => {$(
+ // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the
+ // right type.
+ unsafe impl$(<$($implarg)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for
+ $self $(<$($selfarg),*>)?
+ {}
+
+ unsafe impl$(<$($implarg)*>)? $crate::list::HasListLinks$(<$id>)? for
+ $self $(<$($selfarg),*>)?
+ {
+ const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize;
+
+ #[inline]
+ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? {
+ // SAFETY: The caller promises that the pointer is not dangling.
+ let ptr: *mut $crate::list::ListLinksSelfPtr<$item_type $(, $id)?> =
+ unsafe { ::core::ptr::addr_of_mut!((*ptr).$field) };
+ ptr.cast()
+ }
+ }
+ )*};
+}
+pub use impl_has_list_links_self_ptr;
+
+/// Implements the [`ListItem`] trait for the given type.
+///
+/// Requires that the type implements [`HasListLinks`]. Use the [`impl_has_list_links!`] macro to
+/// implement that trait.
+///
+/// [`ListItem`]: crate::list::ListItem
+#[macro_export]
+macro_rules! impl_list_item {
+ (
+ $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $t:ty {
+ using ListLinks;
+ })*
+ ) => {$(
+ // SAFETY: See GUARANTEES comment on each method.
+ unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $t {
+ // GUARANTEES:
+ // * This returns the same pointer as `prepare_to_insert` because `prepare_to_insert`
+ // is implemented in terms of `view_links`.
+ // * By the type invariants of `ListLinks`, the `ListLinks` has two null pointers when
+ // this value is not in a list.
+ unsafe fn view_links(me: *const Self) -> *mut $crate::list::ListLinks<$num> {
+ // SAFETY: The caller guarantees that `me` points at a valid value of type `Self`.
+ unsafe {
+ <Self as $crate::list::HasListLinks<$num>>::raw_get_list_links(me.cast_mut())
+ }
+ }
+
+ // GUARANTEES:
+ // * `me` originates from the most recent call to `prepare_to_insert`, which just added
+ // `offset` to the pointer passed to `prepare_to_insert`. This method subtracts
+ // `offset` from `me` so it returns the pointer originally passed to
+ // `prepare_to_insert`.
+ // * The pointer remains valid until the next call to `post_remove` because the caller
+ // of the most recent call to `prepare_to_insert` promised to retain ownership of the
+ // `ListArc` containing `Self` until the next call to `post_remove`. The value cannot
+ // be destroyed while a `ListArc` reference exists.
+ unsafe fn view_value(me: *mut $crate::list::ListLinks<$num>) -> *const Self {
+ let offset = <Self as $crate::list::HasListLinks<$num>>::OFFSET;
+ // SAFETY: `me` originates from the most recent call to `prepare_to_insert`, so it
+ // points at the field at offset `offset` in a value of type `Self`. Thus,
+ // subtracting `offset` from `me` is still in-bounds of the allocation.
+ unsafe { (me as *const u8).sub(offset) as *const Self }
+ }
+
+ // GUARANTEES:
+ // This implementation of `ListItem` will not give out exclusive access to the same
+ // `ListLinks` several times because calls to `prepare_to_insert` and `post_remove`
+ // must alternate and exclusive access is given up when `post_remove` is called.
+ //
+ // Other invocations of `impl_list_item!` also cannot give out exclusive access to the
+ // same `ListLinks` because you can only implement `ListItem` once for each value of
+ // `ID`, and the `ListLinks` fields only work with the specified `ID`.
+ unsafe fn prepare_to_insert(me: *const Self) -> *mut $crate::list::ListLinks<$num> {
+ // SAFETY: The caller promises that `me` points at a valid value.
+ unsafe { <Self as $crate::list::ListItem<$num>>::view_links(me) }
+ }
+
+ // GUARANTEES:
+ // * `me` originates from the most recent call to `prepare_to_insert`, which just added
+ // `offset` to the pointer passed to `prepare_to_insert`. This method subtracts
+ // `offset` from `me` so it returns the pointer originally passed to
+ // `prepare_to_insert`.
+ unsafe fn post_remove(me: *mut $crate::list::ListLinks<$num>) -> *const Self {
+ let offset = <Self as $crate::list::HasListLinks<$num>>::OFFSET;
+ // SAFETY: `me` originates from the most recent call to `prepare_to_insert`, so it
+ // points at the field at offset `offset` in a value of type `Self`. Thus,
+ // subtracting `offset` from `me` is still in-bounds of the allocation.
+ unsafe { (me as *const u8).sub(offset) as *const Self }
+ }
+ }
+ )*};
+
+ (
+ $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $t:ty {
+ using ListLinksSelfPtr;
+ })*
+ ) => {$(
+ // SAFETY: See GUARANTEES comment on each method.
+ unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $t {
+ // GUARANTEES:
+ // This implementation of `ListItem` will not give out exclusive access to the same
+ // `ListLinks` several times because calls to `prepare_to_insert` and `post_remove`
+ // must alternate and exclusive access is given up when `post_remove` is called.
+ //
+ // Other invocations of `impl_list_item!` also cannot give out exclusive access to the
+ // same `ListLinks` because you can only implement `ListItem` once for each value of
+ // `ID`, and the `ListLinks` fields only work with the specified `ID`.
+ unsafe fn prepare_to_insert(me: *const Self) -> *mut $crate::list::ListLinks<$num> {
+ // SAFETY: The caller promises that `me` points at a valid value of type `Self`.
+ let links_field = unsafe { <Self as $crate::list::ListItem<$num>>::view_links(me) };
+
+ let spoff = $crate::list::ListLinksSelfPtr::<Self, $num>::LIST_LINKS_SELF_PTR_OFFSET;
+ // Goes via the offset as the field is private.
+ //
+ // SAFETY: The constant is equal to `offset_of!(ListLinksSelfPtr, self_ptr)`, so
+ // the pointer stays in bounds of the allocation.
+ let self_ptr = unsafe { (links_field as *const u8).add(spoff) }
+ as *const $crate::types::Opaque<*const Self>;
+ let cell_inner = $crate::types::Opaque::raw_get(self_ptr);
+
+ // SAFETY: This value is not accessed in any other places than `prepare_to_insert`,
+ // `post_remove`, or `view_value`. By the safety requirements of those methods,
+ // none of these three methods may be called in parallel with this call to
+ // `prepare_to_insert`, so this write will not race with any other access to the
+ // value.
+ unsafe { ::core::ptr::write(cell_inner, me) };
+
+ links_field
+ }
+
+ // GUARANTEES:
+ // * This returns the same pointer as `prepare_to_insert` because `prepare_to_insert`
+ // returns the return value of `view_links`.
+ // * By the type invariants of `ListLinks`, the `ListLinks` has two null pointers when
+ // this value is not in a list.
+ unsafe fn view_links(me: *const Self) -> *mut $crate::list::ListLinks<$num> {
+ // SAFETY: The caller promises that `me` points at a valid value of type `Self`.
+ unsafe { <Self as HasListLinks<$num>>::raw_get_list_links(me.cast_mut()) }
+ }
+
+ // This function is also used as the implementation of `post_remove`, so the caller
+ // may choose to satisfy the safety requirements of `post_remove` instead of the safety
+ // requirements for `view_value`.
+ //
+ // GUARANTEES: (always)
+ // * This returns the same pointer as the one passed to the most recent call to
+ // `prepare_to_insert` since that call wrote that pointer to this location. The value
+ // is only modified in `prepare_to_insert`, so it has not been modified since the
+ // most recent call.
+ //
+ // GUARANTEES: (only when using the `view_value` safety requirements)
+ // * The pointer remains valid until the next call to `post_remove` because the caller
+ // of the most recent call to `prepare_to_insert` promised to retain ownership of the
+ // `ListArc` containing `Self` until the next call to `post_remove`. The value cannot
+ // be destroyed while a `ListArc` reference exists.
+ unsafe fn view_value(links_field: *mut $crate::list::ListLinks<$num>) -> *const Self {
+ let spoff = $crate::list::ListLinksSelfPtr::<Self, $num>::LIST_LINKS_SELF_PTR_OFFSET;
+ // SAFETY: The constant is equal to `offset_of!(ListLinksSelfPtr, self_ptr)`, so
+ // the pointer stays in bounds of the allocation.
+ let self_ptr = unsafe { (links_field as *const u8).add(spoff) }
+ as *const ::core::cell::UnsafeCell<*const Self>;
+ let cell_inner = ::core::cell::UnsafeCell::raw_get(self_ptr);
+ // SAFETY: This is not a data race, because the only function that writes to this
+ // value is `prepare_to_insert`, but by the safety requirements the
+ // `prepare_to_insert` method may not be called in parallel with `view_value` or
+ // `post_remove`.
+ unsafe { ::core::ptr::read(cell_inner) }
+ }
+
+ // GUARANTEES:
+ // The first guarantee of `view_value` is exactly what `post_remove` guarantees.
+ unsafe fn post_remove(me: *mut $crate::list::ListLinks<$num>) -> *const Self {
+ // SAFETY: This specific implementation of `view_value` allows the caller to
+ // promise the safety requirements of `post_remove` instead of the safety
+ // requirements for `view_value`.
+ unsafe { <Self as $crate::list::ListItem<$num>>::view_value(me) }
+ }
+ }
+ )*};
+}
+pub use impl_list_item;
diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs
new file mode 100644
index 000000000000..e14433b2ab9d
--- /dev/null
+++ b/rust/kernel/miscdevice.rs
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Miscdevice support.
+//!
+//! C headers: [`include/linux/miscdevice.h`](srctree/include/linux/miscdevice.h).
+//!
+//! Reference: <https://www.kernel.org/doc/html/latest/driver-api/misc_devices.html>
+
+use crate::{
+ bindings,
+ device::Device,
+ error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR},
+ ffi::{c_int, c_long, c_uint, c_ulong},
+ fs::File,
+ prelude::*,
+ seq_file::SeqFile,
+ str::CStr,
+ types::{ForeignOwnable, Opaque},
+};
+use core::{marker::PhantomData, mem::MaybeUninit, pin::Pin};
+
+/// Options for creating a misc device.
+#[derive(Copy, Clone)]
+pub struct MiscDeviceOptions {
+ /// The name of the miscdevice.
+ pub name: &'static CStr,
+}
+
+impl MiscDeviceOptions {
+ /// Create a raw `struct miscdev` ready for registration.
+ pub const fn into_raw<T: MiscDevice>(self) -> bindings::miscdevice {
+ // SAFETY: All zeros is valid for this C type.
+ let mut result: bindings::miscdevice = unsafe { MaybeUninit::zeroed().assume_init() };
+ result.minor = bindings::MISC_DYNAMIC_MINOR as _;
+ result.name = self.name.as_char_ptr();
+ result.fops = create_vtable::<T>();
+ result
+ }
+}
+
+/// A registration of a miscdevice.
+///
+/// # Invariants
+///
+/// `inner` is a registered misc device.
+#[repr(transparent)]
+#[pin_data(PinnedDrop)]
+pub struct MiscDeviceRegistration<T> {
+ #[pin]
+ inner: Opaque<bindings::miscdevice>,
+ _t: PhantomData<T>,
+}
+
+// SAFETY: It is allowed to call `misc_deregister` on a different thread from where you called
+// `misc_register`.
+unsafe impl<T> Send for MiscDeviceRegistration<T> {}
+// SAFETY: All `&self` methods on this type are written to ensure that it is safe to call them in
+// parallel.
+unsafe impl<T> Sync for MiscDeviceRegistration<T> {}
+
+impl<T: MiscDevice> MiscDeviceRegistration<T> {
+ /// Register a misc device.
+ pub fn register(opts: MiscDeviceOptions) -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {
+ inner <- Opaque::try_ffi_init(move |slot: *mut bindings::miscdevice| {
+ // SAFETY: The initializer can write to the provided `slot`.
+ unsafe { slot.write(opts.into_raw::<T>()) };
+
+ // SAFETY: We just wrote the misc device options to the slot. The miscdevice will
+ // get unregistered before `slot` is deallocated because the memory is pinned and
+ // the destructor of this type deallocates the memory.
+ // INVARIANT: If this returns `Ok(())`, then the `slot` will contain a registered
+ // misc device.
+ to_result(unsafe { bindings::misc_register(slot) })
+ }),
+ _t: PhantomData,
+ })
+ }
+
+ /// Returns a raw pointer to the misc device.
+ pub fn as_raw(&self) -> *mut bindings::miscdevice {
+ self.inner.get()
+ }
+
+ /// Access the `this_device` field.
+ pub fn device(&self) -> &Device {
+ // SAFETY: This can only be called after a successful register(), which always
+ // initialises `this_device` with a valid device. Furthermore, the signature of this
+ // function tells the borrow-checker that the `&Device` reference must not outlive the
+ // `&MiscDeviceRegistration<T>` used to obtain it, so the last use of the reference must be
+ // before the underlying `struct miscdevice` is destroyed.
+ unsafe { Device::as_ref((*self.as_raw()).this_device) }
+ }
+}
+
+#[pinned_drop]
+impl<T> PinnedDrop for MiscDeviceRegistration<T> {
+ fn drop(self: Pin<&mut Self>) {
+ // SAFETY: We know that the device is registered by the type invariants.
+ unsafe { bindings::misc_deregister(self.inner.get()) };
+ }
+}
+
+/// Trait implemented by the private data of an open misc device.
+#[vtable]
+pub trait MiscDevice: Sized {
+ /// What kind of pointer should `Self` be wrapped in.
+ type Ptr: ForeignOwnable + Send + Sync;
+
+ /// Called when the misc device is opened.
+ ///
+ /// The returned pointer will be stored as the private data for the file.
+ fn open(_file: &File, _misc: &MiscDeviceRegistration<Self>) -> Result<Self::Ptr>;
+
+ /// Called when the misc device is released.
+ fn release(device: Self::Ptr, _file: &File) {
+ drop(device);
+ }
+
+ /// Handler for ioctls.
+ ///
+ /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`].
+ ///
+ /// [`kernel::ioctl`]: mod@crate::ioctl
+ fn ioctl(
+ _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>,
+ _file: &File,
+ _cmd: u32,
+ _arg: usize,
+ ) -> Result<isize> {
+ build_error!(VTABLE_DEFAULT_ERROR)
+ }
+
+ /// Handler for ioctls.
+ ///
+ /// Used for 32-bit userspace on 64-bit platforms.
+ ///
+ /// This method is optional and only needs to be provided if the ioctl relies on structures
+ /// that have different layout on 32-bit and 64-bit userspace. If no implementation is
+ /// provided, then `compat_ptr_ioctl` will be used instead.
+ #[cfg(CONFIG_COMPAT)]
+ fn compat_ioctl(
+ _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>,
+ _file: &File,
+ _cmd: u32,
+ _arg: usize,
+ ) -> Result<isize> {
+ build_error!(VTABLE_DEFAULT_ERROR)
+ }
+
+ /// Show info for this fd.
+ fn show_fdinfo(
+ _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>,
+ _m: &SeqFile,
+ _file: &File,
+ ) {
+ build_error!(VTABLE_DEFAULT_ERROR)
+ }
+}
+
+const fn create_vtable<T: MiscDevice>() -> &'static bindings::file_operations {
+ const fn maybe_fn<T: Copy>(check: bool, func: T) -> Option<T> {
+ if check {
+ Some(func)
+ } else {
+ None
+ }
+ }
+
+ struct VtableHelper<T: MiscDevice> {
+ _t: PhantomData<T>,
+ }
+ impl<T: MiscDevice> VtableHelper<T> {
+ const VTABLE: bindings::file_operations = bindings::file_operations {
+ open: Some(fops_open::<T>),
+ release: Some(fops_release::<T>),
+ unlocked_ioctl: maybe_fn(T::HAS_IOCTL, fops_ioctl::<T>),
+ #[cfg(CONFIG_COMPAT)]
+ compat_ioctl: if T::HAS_COMPAT_IOCTL {
+ Some(fops_compat_ioctl::<T>)
+ } else if T::HAS_IOCTL {
+ Some(bindings::compat_ptr_ioctl)
+ } else {
+ None
+ },
+ show_fdinfo: maybe_fn(T::HAS_SHOW_FDINFO, fops_show_fdinfo::<T>),
+ // SAFETY: All zeros is a valid value for `bindings::file_operations`.
+ ..unsafe { MaybeUninit::zeroed().assume_init() }
+ };
+ }
+
+ &VtableHelper::<T>::VTABLE
+}
+
+/// # Safety
+///
+/// `file` and `inode` must be the file and inode for a file that is undergoing initialization.
+/// The file must be associated with a `MiscDeviceRegistration<T>`.
+unsafe extern "C" fn fops_open<T: MiscDevice>(
+ inode: *mut bindings::inode,
+ raw_file: *mut bindings::file,
+) -> c_int {
+ // SAFETY: The pointers are valid and for a file being opened.
+ let ret = unsafe { bindings::generic_file_open(inode, raw_file) };
+ if ret != 0 {
+ return ret;
+ }
+
+ // SAFETY: The open call of a file can access the private data.
+ let misc_ptr = unsafe { (*raw_file).private_data };
+
+ // SAFETY: This is a miscdevice, so `misc_open()` set the private data to a pointer to the
+ // associated `struct miscdevice` before calling into this method. Furthermore, `misc_open()`
+ // ensures that the miscdevice can't be unregistered and freed during this call to `fops_open`.
+ let misc = unsafe { &*misc_ptr.cast::<MiscDeviceRegistration<T>>() };
+
+ // SAFETY:
+ // * This underlying file is valid for (much longer than) the duration of `T::open`.
+ // * There is no active fdget_pos region on the file on this thread.
+ let file = unsafe { File::from_raw_file(raw_file) };
+
+ let ptr = match T::open(file, misc) {
+ Ok(ptr) => ptr,
+ Err(err) => return err.to_errno(),
+ };
+
+ // This overwrites the private data with the value specified by the user, changing the type of
+ // this file's private data. All future accesses to the private data is performed by other
+ // fops_* methods in this file, which all correctly cast the private data to the new type.
+ //
+ // SAFETY: The open call of a file can access the private data.
+ unsafe { (*raw_file).private_data = ptr.into_foreign() };
+
+ 0
+}
+
+/// # Safety
+///
+/// `file` and `inode` must be the file and inode for a file that is being released. The file must
+/// be associated with a `MiscDeviceRegistration<T>`.
+unsafe extern "C" fn fops_release<T: MiscDevice>(
+ _inode: *mut bindings::inode,
+ file: *mut bindings::file,
+) -> c_int {
+ // SAFETY: The release call of a file owns the private data.
+ let private = unsafe { (*file).private_data };
+ // SAFETY: The release call of a file owns the private data.
+ let ptr = unsafe { <T::Ptr as ForeignOwnable>::from_foreign(private) };
+
+ // SAFETY:
+ // * The file is valid for the duration of this call.
+ // * There is no active fdget_pos region on the file on this thread.
+ T::release(ptr, unsafe { File::from_raw_file(file) });
+
+ 0
+}
+
+/// # Safety
+///
+/// `file` must be a valid file that is associated with a `MiscDeviceRegistration<T>`.
+unsafe extern "C" fn fops_ioctl<T: MiscDevice>(
+ file: *mut bindings::file,
+ cmd: c_uint,
+ arg: c_ulong,
+) -> c_long {
+ // SAFETY: The ioctl call of a file can access the private data.
+ let private = unsafe { (*file).private_data };
+ // SAFETY: Ioctl calls can borrow the private data of the file.
+ let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
+
+ // SAFETY:
+ // * The file is valid for the duration of this call.
+ // * There is no active fdget_pos region on the file on this thread.
+ let file = unsafe { File::from_raw_file(file) };
+
+ match T::ioctl(device, file, cmd, arg) {
+ Ok(ret) => ret as c_long,
+ Err(err) => err.to_errno() as c_long,
+ }
+}
+
+/// # Safety
+///
+/// `file` must be a valid file that is associated with a `MiscDeviceRegistration<T>`.
+#[cfg(CONFIG_COMPAT)]
+unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>(
+ file: *mut bindings::file,
+ cmd: c_uint,
+ arg: c_ulong,
+) -> c_long {
+ // SAFETY: The compat ioctl call of a file can access the private data.
+ let private = unsafe { (*file).private_data };
+ // SAFETY: Ioctl calls can borrow the private data of the file.
+ let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
+
+ // SAFETY:
+ // * The file is valid for the duration of this call.
+ // * There is no active fdget_pos region on the file on this thread.
+ let file = unsafe { File::from_raw_file(file) };
+
+ match T::compat_ioctl(device, file, cmd, arg) {
+ Ok(ret) => ret as c_long,
+ Err(err) => err.to_errno() as c_long,
+ }
+}
+
+/// # Safety
+///
+/// - `file` must be a valid file that is associated with a `MiscDeviceRegistration<T>`.
+/// - `seq_file` must be a valid `struct seq_file` that we can write to.
+unsafe extern "C" fn fops_show_fdinfo<T: MiscDevice>(
+ seq_file: *mut bindings::seq_file,
+ file: *mut bindings::file,
+) {
+ // SAFETY: The release call of a file owns the private data.
+ let private = unsafe { (*file).private_data };
+ // SAFETY: Ioctl calls can borrow the private data of the file.
+ let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
+ // SAFETY:
+ // * The file is valid for the duration of this call.
+ // * There is no active fdget_pos region on the file on this thread.
+ let file = unsafe { File::from_raw_file(file) };
+ // SAFETY: The caller ensures that the pointer is valid and exclusive for the duration in which
+ // this method is called.
+ let m = unsafe { SeqFile::from_raw(seq_file) };
+
+ T::show_fdinfo(device, m, file);
+}
diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs
index e457b3c7cb2f..bb654a28dab3 100644
--- a/rust/kernel/net/phy.rs
+++ b/rust/kernel/net/phy.rs
@@ -4,11 +4,12 @@
//! Network PHY device.
//!
-//! C headers: [`include/linux/phy.h`](../../../../../../../include/linux/phy.h).
+//! C headers: [`include/linux/phy.h`](srctree/include/linux/phy.h).
-use crate::{bindings, error::*, prelude::*, str::CStr, types::Opaque};
+use crate::{error::*, prelude::*, types::Opaque};
+use core::{marker::PhantomData, ptr::addr_of_mut};
-use core::marker::PhantomData;
+pub mod reg;
/// PHY state machine states.
///
@@ -16,7 +17,7 @@ use core::marker::PhantomData;
///
/// Some of PHY drivers access to the state of PHY's software state machine.
///
-/// [`enum phy_state`]: ../../../../../../../include/linux/phy.h
+/// [`enum phy_state`]: srctree/include/linux/phy.h
#[derive(PartialEq, Eq)]
pub enum DeviceState {
/// PHY device and driver are not ready for anything.
@@ -58,10 +59,11 @@ pub enum DuplexMode {
///
/// # Invariants
///
-/// Referencing a `phy_device` using this struct asserts that you are in
-/// a context where all methods defined on this struct are safe to call.
+/// - Referencing a `phy_device` using this struct asserts that you are in
+/// a context where all methods defined on this struct are safe to call.
+/// - This struct always has a valid `self.0.mdio.dev`.
///
-/// [`struct phy_device`]: ../../../../../../../include/linux/phy.h
+/// [`struct phy_device`]: srctree/include/linux/phy.h
// During the calls to most functions in [`Driver`], the C side (`PHYLIB`) holds a lock that is
// unique for every instance of [`Device`]. `PHYLIB` uses a different serialization technique for
// [`Driver::resume`] and [`Driver::suspend`]: `PHYLIB` updates `phy_device`'s state with
@@ -76,9 +78,11 @@ impl Device {
///
/// # Safety
///
- /// For the duration of 'a, the pointer must point at a valid `phy_device`,
- /// and the caller must be in a context where all methods defined on this struct
- /// are safe to call.
+ /// For the duration of `'a`,
+ /// - the pointer must point at a valid `phy_device`, and the caller
+ /// must be in a context where all methods defined on this struct
+ /// are safe to call.
+ /// - `(*ptr).mdio.dev` must be a valid.
unsafe fn from_raw<'a>(ptr: *mut bindings::phy_device) -> &'a mut Self {
// CAST: `Self` is a `repr(transparent)` wrapper around `bindings::phy_device`.
let ptr = ptr.cast::<Self>();
@@ -175,32 +179,15 @@ impl Device {
unsafe { (*phydev).duplex = v };
}
- /// Reads a given C22 PHY register.
+ /// Reads a PHY register.
// This function reads a hardware register and updates the stats so takes `&mut self`.
- pub fn read(&mut self, regnum: u16) -> Result<u16> {
- let phydev = self.0.get();
- // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`.
- // So it's just an FFI call, open code of `phy_read()` with a valid `phy_device` pointer
- // `phydev`.
- let ret = unsafe {
- bindings::mdiobus_read((*phydev).mdio.bus, (*phydev).mdio.addr, regnum.into())
- };
- if ret < 0 {
- Err(Error::from_errno(ret))
- } else {
- Ok(ret as u16)
- }
+ pub fn read<R: reg::Register>(&mut self, reg: R) -> Result<u16> {
+ reg.read(self)
}
- /// Writes a given C22 PHY register.
- pub fn write(&mut self, regnum: u16, val: u16) -> Result {
- let phydev = self.0.get();
- // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`.
- // So it's just an FFI call, open code of `phy_write()` with a valid `phy_device` pointer
- // `phydev`.
- to_result(unsafe {
- bindings::mdiobus_write((*phydev).mdio.bus, (*phydev).mdio.addr, regnum.into(), val)
- })
+ /// Writes a PHY register.
+ pub fn write<R: reg::Register>(&mut self, reg: R, val: u16) -> Result {
+ reg.write(self, val)
}
/// Reads a paged register.
@@ -265,16 +252,8 @@ impl Device {
}
/// Checks the link status and updates current link state.
- pub fn genphy_read_status(&mut self) -> Result<u16> {
- let phydev = self.0.get();
- // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`.
- // So it's just an FFI call.
- let ret = unsafe { bindings::genphy_read_status(phydev) };
- if ret < 0 {
- Err(Error::from_errno(ret))
- } else {
- Ok(ret as u16)
- }
+ pub fn genphy_read_status<R: reg::Register>(&mut self) -> Result<u16> {
+ R::read_status(self)
}
/// Updates the link status.
@@ -302,6 +281,14 @@ impl Device {
}
}
+impl AsRef<kernel::device::Device> for Device {
+ fn as_ref(&self) -> &kernel::device::Device {
+ let phydev = self.0.get();
+ // SAFETY: The struct invariant ensures that `mdio.dev` is valid.
+ unsafe { kernel::device::Device::as_ref(addr_of_mut!((*phydev).mdio.dev)) }
+ }
+}
+
/// Defines certain other features this PHY supports (like interrupts).
///
/// These flag values are used in [`Driver::FLAGS`].
@@ -327,7 +314,7 @@ impl<T: Driver> Adapter<T> {
/// `phydev` must be passed by the corresponding callback in `phy_driver`.
unsafe extern "C" fn soft_reset_callback(
phydev: *mut bindings::phy_device,
- ) -> core::ffi::c_int {
+ ) -> crate::ffi::c_int {
from_result(|| {
// SAFETY: This callback is called only in contexts
// where we hold `phy_device->lock`, so the accessors on
@@ -341,9 +328,24 @@ impl<T: Driver> Adapter<T> {
/// # Safety
///
/// `phydev` must be passed by the corresponding callback in `phy_driver`.
+ unsafe extern "C" fn probe_callback(phydev: *mut bindings::phy_device) -> crate::ffi::c_int {
+ from_result(|| {
+ // SAFETY: This callback is called only in contexts
+ // where we can exclusively access `phy_device` because
+ // it's not published yet, so the accessors on `Device` are okay
+ // to call.
+ let dev = unsafe { Device::from_raw(phydev) };
+ T::probe(dev)?;
+ Ok(0)
+ })
+ }
+
+ /// # Safety
+ ///
+ /// `phydev` must be passed by the corresponding callback in `phy_driver`.
unsafe extern "C" fn get_features_callback(
phydev: *mut bindings::phy_device,
- ) -> core::ffi::c_int {
+ ) -> crate::ffi::c_int {
from_result(|| {
// SAFETY: This callback is called only in contexts
// where we hold `phy_device->lock`, so the accessors on
@@ -357,7 +359,7 @@ impl<T: Driver> Adapter<T> {
/// # Safety
///
/// `phydev` must be passed by the corresponding callback in `phy_driver`.
- unsafe extern "C" fn suspend_callback(phydev: *mut bindings::phy_device) -> core::ffi::c_int {
+ unsafe extern "C" fn suspend_callback(phydev: *mut bindings::phy_device) -> crate::ffi::c_int {
from_result(|| {
// SAFETY: The C core code ensures that the accessors on
// `Device` are okay to call even though `phy_device->lock`
@@ -371,7 +373,7 @@ impl<T: Driver> Adapter<T> {
/// # Safety
///
/// `phydev` must be passed by the corresponding callback in `phy_driver`.
- unsafe extern "C" fn resume_callback(phydev: *mut bindings::phy_device) -> core::ffi::c_int {
+ unsafe extern "C" fn resume_callback(phydev: *mut bindings::phy_device) -> crate::ffi::c_int {
from_result(|| {
// SAFETY: The C core code ensures that the accessors on
// `Device` are okay to call even though `phy_device->lock`
@@ -387,7 +389,7 @@ impl<T: Driver> Adapter<T> {
/// `phydev` must be passed by the corresponding callback in `phy_driver`.
unsafe extern "C" fn config_aneg_callback(
phydev: *mut bindings::phy_device,
- ) -> core::ffi::c_int {
+ ) -> crate::ffi::c_int {
from_result(|| {
// SAFETY: This callback is called only in contexts
// where we hold `phy_device->lock`, so the accessors on
@@ -403,7 +405,7 @@ impl<T: Driver> Adapter<T> {
/// `phydev` must be passed by the corresponding callback in `phy_driver`.
unsafe extern "C" fn read_status_callback(
phydev: *mut bindings::phy_device,
- ) -> core::ffi::c_int {
+ ) -> crate::ffi::c_int {
from_result(|| {
// SAFETY: This callback is called only in contexts
// where we hold `phy_device->lock`, so the accessors on
@@ -419,7 +421,7 @@ impl<T: Driver> Adapter<T> {
/// `phydev` must be passed by the corresponding callback in `phy_driver`.
unsafe extern "C" fn match_phy_device_callback(
phydev: *mut bindings::phy_device,
- ) -> core::ffi::c_int {
+ ) -> crate::ffi::c_int {
// SAFETY: This callback is called only in contexts
// where we hold `phy_device->lock`, so the accessors on
// `Device` are okay to call.
@@ -486,12 +488,12 @@ impl<T: Driver> Adapter<T> {
///
/// `self.0` is always in a valid state.
///
-/// [`struct phy_driver`]: ../../../../../../../include/linux/phy.h
+/// [`struct phy_driver`]: srctree/include/linux/phy.h
#[repr(transparent)]
pub struct DriverVTable(Opaque<bindings::phy_driver>);
// SAFETY: `DriverVTable` doesn't expose any &self method to access internal data, so it's safe to
-// share `&DriverVTable` across execution context boundries.
+// share `&DriverVTable` across execution context boundaries.
unsafe impl Sync for DriverVTable {}
/// Creates a [`DriverVTable`] instance from [`Driver`].
@@ -511,6 +513,11 @@ pub const fn create_phy_driver<T: Driver>() -> DriverVTable {
} else {
None
},
+ probe: if T::HAS_PROBE {
+ Some(Adapter::<T>::probe_callback)
+ } else {
+ None
+ },
get_features: if T::HAS_GET_FEATURES {
Some(Adapter::<T>::get_features_callback)
} else {
@@ -580,12 +587,17 @@ pub trait Driver {
/// Issues a PHY software reset.
fn soft_reset(_dev: &mut Device) -> Result {
- Err(code::ENOTSUPP)
+ build_error!(VTABLE_DEFAULT_ERROR)
+ }
+
+ /// Sets up device-specific structures during discovery.
+ fn probe(_dev: &mut Device) -> Result {
+ build_error!(VTABLE_DEFAULT_ERROR)
}
/// Probes the hardware to determine what abilities it has.
fn get_features(_dev: &mut Device) -> Result {
- Err(code::ENOTSUPP)
+ build_error!(VTABLE_DEFAULT_ERROR)
}
/// Returns true if this is a suitable driver for the given phydev.
@@ -597,32 +609,32 @@ pub trait Driver {
/// Configures the advertisement and resets auto-negotiation
/// if auto-negotiation is enabled.
fn config_aneg(_dev: &mut Device) -> Result {
- Err(code::ENOTSUPP)
+ build_error!(VTABLE_DEFAULT_ERROR)
}
/// Determines the negotiated speed and duplex.
fn read_status(_dev: &mut Device) -> Result<u16> {
- Err(code::ENOTSUPP)
+ build_error!(VTABLE_DEFAULT_ERROR)
}
/// Suspends the hardware, saving state if needed.
fn suspend(_dev: &mut Device) -> Result {
- Err(code::ENOTSUPP)
+ build_error!(VTABLE_DEFAULT_ERROR)
}
/// Resumes the hardware, restoring state if needed.
fn resume(_dev: &mut Device) -> Result {
- Err(code::ENOTSUPP)
+ build_error!(VTABLE_DEFAULT_ERROR)
}
/// Overrides the default MMD read function for reading a MMD register.
fn read_mmd(_dev: &mut Device, _devnum: u8, _regnum: u16) -> Result<u16> {
- Err(code::ENOTSUPP)
+ build_error!(VTABLE_DEFAULT_ERROR)
}
/// Overrides the default MMD write function for writing a MMD register.
fn write_mmd(_dev: &mut Device, _devnum: u8, _regnum: u16, _val: u16) -> Result {
- Err(code::ENOTSUPP)
+ build_error!(VTABLE_DEFAULT_ERROR)
}
/// Callback for notification of link change.
@@ -640,6 +652,10 @@ pub struct Registration {
drivers: Pin<&'static mut [DriverVTable]>,
}
+// SAFETY: The only action allowed in a `Registration` instance is dropping it, which is safe to do
+// from any thread because `phy_drivers_unregister` can be called from any thread context.
+unsafe impl Send for Registration {}
+
impl Registration {
/// Registers a PHY driver.
pub fn register(
@@ -821,7 +837,7 @@ impl DeviceMask {
/// [::kernel::net::phy::create_phy_driver::<PhySample>()];
///
/// impl ::kernel::Module for Module {
-/// fn init(module: &'static ThisModule) -> Result<Self> {
+/// fn init(module: &'static ::kernel::ThisModule) -> Result<Self> {
/// let drivers = unsafe { &mut DRIVERS };
/// let mut reg = ::kernel::net::phy::Registration::register(
/// module,
@@ -832,9 +848,7 @@ impl DeviceMask {
/// }
/// };
///
-/// #[cfg(MODULE)]
-/// #[no_mangle]
-/// static __mod_mdio__phydev_device_table: [::kernel::bindings::mdio_device_id; 2] = [
+/// const _DEVICE_TABLE: [::kernel::bindings::mdio_device_id; 2] = [
/// ::kernel::bindings::mdio_device_id {
/// phy_id: 0x00000001,
/// phy_id_mask: 0xffffffff,
@@ -844,6 +858,9 @@ impl DeviceMask {
/// phy_id_mask: 0,
/// },
/// ];
+/// #[cfg(MODULE)]
+/// #[no_mangle]
+/// static __mod_device_table__mdio__phydev: [::kernel::bindings::mdio_device_id; 2] = _DEVICE_TABLE;
/// ```
#[macro_export]
macro_rules! module_phy_driver {
@@ -855,9 +872,7 @@ macro_rules! module_phy_driver {
(@device_table [$($dev:expr),+]) => {
// SAFETY: C will not read off the end of this constant since the last element is zero.
- #[cfg(MODULE)]
- #[no_mangle]
- static __mod_mdio__phydev_device_table: [$crate::bindings::mdio_device_id;
+ const _DEVICE_TABLE: [$crate::bindings::mdio_device_id;
$crate::module_phy_driver!(@count_devices $($dev),+) + 1] = [
$($dev.mdio_device_id()),+,
$crate::bindings::mdio_device_id {
@@ -865,6 +880,11 @@ macro_rules! module_phy_driver {
phy_id_mask: 0
}
];
+
+ #[cfg(MODULE)]
+ #[no_mangle]
+ static __mod_device_table__mdio__phydev: [$crate::bindings::mdio_device_id;
+ $crate::module_phy_driver!(@count_devices $($dev),+) + 1] = _DEVICE_TABLE;
};
(drivers: [$($driver:ident),+ $(,)?], device_table: [$($dev:expr),+ $(,)?], $($f:tt)*) => {
@@ -883,7 +903,7 @@ macro_rules! module_phy_driver {
[$($crate::net::phy::create_phy_driver::<$driver>()),+];
impl $crate::Module for Module {
- fn init(module: &'static ThisModule) -> Result<Self> {
+ fn init(module: &'static $crate::ThisModule) -> Result<Self> {
// SAFETY: The anonymous constant guarantees that nobody else can access
// the `DRIVERS` static. The array is used only in the C side.
let drivers = unsafe { &mut DRIVERS };
diff --git a/rust/kernel/net/phy/reg.rs b/rust/kernel/net/phy/reg.rs
new file mode 100644
index 000000000000..a7db0064cb7d
--- /dev/null
+++ b/rust/kernel/net/phy/reg.rs
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 FUJITA Tomonori <fujita.tomonori@gmail.com>
+
+//! PHY register interfaces.
+//!
+//! This module provides support for accessing PHY registers in the
+//! Ethernet management interface clauses 22 and 45 register namespaces, as
+//! defined in IEEE 802.3.
+
+use super::Device;
+use crate::build_assert;
+use crate::error::*;
+use crate::uapi;
+
+mod private {
+ /// Marker that a trait cannot be implemented outside of this crate
+ pub trait Sealed {}
+}
+
+/// Accesses PHY registers.
+///
+/// This trait is used to implement the unified interface to access
+/// C22 and C45 PHY registers.
+///
+/// # Examples
+///
+/// ```ignore
+/// fn link_change_notify(dev: &mut Device) {
+/// // read C22 BMCR register
+/// dev.read(C22::BMCR);
+/// // read C45 PMA/PMD control 1 register
+/// dev.read(C45::new(Mmd::PMAPMD, 0));
+///
+/// // Checks the link status as reported by registers in the C22 namespace
+/// // and updates current link state.
+/// dev.genphy_read_status::<phy::C22>();
+/// // Checks the link status as reported by registers in the C45 namespace
+/// // and updates current link state.
+/// dev.genphy_read_status::<phy::C45>();
+/// }
+/// ```
+pub trait Register: private::Sealed {
+ /// Reads a PHY register.
+ fn read(&self, dev: &mut Device) -> Result<u16>;
+
+ /// Writes a PHY register.
+ fn write(&self, dev: &mut Device, val: u16) -> Result;
+
+ /// Checks the link status and updates current link state.
+ fn read_status(dev: &mut Device) -> Result<u16>;
+}
+
+/// A single MDIO clause 22 register address (5 bits).
+#[derive(Copy, Clone, Debug)]
+pub struct C22(u8);
+
+impl C22 {
+ /// Basic mode control.
+ pub const BMCR: Self = C22(0x00);
+ /// Basic mode status.
+ pub const BMSR: Self = C22(0x01);
+ /// PHY identifier 1.
+ pub const PHYSID1: Self = C22(0x02);
+ /// PHY identifier 2.
+ pub const PHYSID2: Self = C22(0x03);
+ /// Auto-negotiation advertisement.
+ pub const ADVERTISE: Self = C22(0x04);
+ /// Auto-negotiation link partner base page ability.
+ pub const LPA: Self = C22(0x05);
+ /// Auto-negotiation expansion.
+ pub const EXPANSION: Self = C22(0x06);
+ /// Auto-negotiation next page transmit.
+ pub const NEXT_PAGE_TRANSMIT: Self = C22(0x07);
+ /// Auto-negotiation link partner received next page.
+ pub const LP_RECEIVED_NEXT_PAGE: Self = C22(0x08);
+ /// Master-slave control.
+ pub const MASTER_SLAVE_CONTROL: Self = C22(0x09);
+ /// Master-slave status.
+ pub const MASTER_SLAVE_STATUS: Self = C22(0x0a);
+ /// PSE Control.
+ pub const PSE_CONTROL: Self = C22(0x0b);
+ /// PSE Status.
+ pub const PSE_STATUS: Self = C22(0x0c);
+ /// MMD Register control.
+ pub const MMD_CONTROL: Self = C22(0x0d);
+ /// MMD Register address data.
+ pub const MMD_DATA: Self = C22(0x0e);
+ /// Extended status.
+ pub const EXTENDED_STATUS: Self = C22(0x0f);
+
+ /// Creates a new instance of `C22` with a vendor specific register.
+ pub const fn vendor_specific<const N: u8>() -> Self {
+ build_assert!(
+ N > 0x0f && N < 0x20,
+ "Vendor-specific register address must be between 16 and 31"
+ );
+ C22(N)
+ }
+}
+
+impl private::Sealed for C22 {}
+
+impl Register for C22 {
+ fn read(&self, dev: &mut Device) -> Result<u16> {
+ let phydev = dev.0.get();
+ // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Device`.
+ // So it's just an FFI call, open code of `phy_read()` with a valid `phy_device` pointer
+ // `phydev`.
+ let ret = unsafe {
+ bindings::mdiobus_read((*phydev).mdio.bus, (*phydev).mdio.addr, self.0.into())
+ };
+ to_result(ret)?;
+ Ok(ret as u16)
+ }
+
+ fn write(&self, dev: &mut Device, val: u16) -> Result {
+ let phydev = dev.0.get();
+ // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Device`.
+ // So it's just an FFI call, open code of `phy_write()` with a valid `phy_device` pointer
+ // `phydev`.
+ to_result(unsafe {
+ bindings::mdiobus_write((*phydev).mdio.bus, (*phydev).mdio.addr, self.0.into(), val)
+ })
+ }
+
+ fn read_status(dev: &mut Device) -> Result<u16> {
+ let phydev = dev.0.get();
+ // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`.
+ // So it's just an FFI call.
+ let ret = unsafe { bindings::genphy_read_status(phydev) };
+ to_result(ret)?;
+ Ok(ret as u16)
+ }
+}
+
+/// A single MDIO clause 45 register device and address.
+#[derive(Copy, Clone, Debug)]
+pub struct Mmd(u8);
+
+impl Mmd {
+ /// Physical Medium Attachment/Dependent.
+ pub const PMAPMD: Self = Mmd(uapi::MDIO_MMD_PMAPMD as u8);
+ /// WAN interface sublayer.
+ pub const WIS: Self = Mmd(uapi::MDIO_MMD_WIS as u8);
+ /// Physical coding sublayer.
+ pub const PCS: Self = Mmd(uapi::MDIO_MMD_PCS as u8);
+ /// PHY Extender sublayer.
+ pub const PHYXS: Self = Mmd(uapi::MDIO_MMD_PHYXS as u8);
+ /// DTE Extender sublayer.
+ pub const DTEXS: Self = Mmd(uapi::MDIO_MMD_DTEXS as u8);
+ /// Transmission convergence.
+ pub const TC: Self = Mmd(uapi::MDIO_MMD_TC as u8);
+ /// Auto negotiation.
+ pub const AN: Self = Mmd(uapi::MDIO_MMD_AN as u8);
+ /// Separated PMA (1).
+ pub const SEPARATED_PMA1: Self = Mmd(8);
+ /// Separated PMA (2).
+ pub const SEPARATED_PMA2: Self = Mmd(9);
+ /// Separated PMA (3).
+ pub const SEPARATED_PMA3: Self = Mmd(10);
+ /// Separated PMA (4).
+ pub const SEPARATED_PMA4: Self = Mmd(11);
+ /// OFDM PMA/PMD.
+ pub const OFDM_PMAPMD: Self = Mmd(12);
+ /// Power unit.
+ pub const POWER_UNIT: Self = Mmd(13);
+ /// Clause 22 extension.
+ pub const C22_EXT: Self = Mmd(uapi::MDIO_MMD_C22EXT as u8);
+ /// Vendor specific 1.
+ pub const VEND1: Self = Mmd(uapi::MDIO_MMD_VEND1 as u8);
+ /// Vendor specific 2.
+ pub const VEND2: Self = Mmd(uapi::MDIO_MMD_VEND2 as u8);
+}
+
+/// A single MDIO clause 45 register device and address.
+///
+/// Clause 45 uses a 5-bit device address to access a specific MMD within
+/// a port, then a 16-bit register address to access a location within
+/// that device. `C45` represents this by storing a [`Mmd`] and
+/// a register number.
+pub struct C45 {
+ devad: Mmd,
+ regnum: u16,
+}
+
+impl C45 {
+ /// Creates a new instance of `C45`.
+ pub fn new(devad: Mmd, regnum: u16) -> Self {
+ Self { devad, regnum }
+ }
+}
+
+impl private::Sealed for C45 {}
+
+impl Register for C45 {
+ fn read(&self, dev: &mut Device) -> Result<u16> {
+ let phydev = dev.0.get();
+ // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Device`.
+ // So it's just an FFI call.
+ let ret =
+ unsafe { bindings::phy_read_mmd(phydev, self.devad.0.into(), self.regnum.into()) };
+ to_result(ret)?;
+ Ok(ret as u16)
+ }
+
+ fn write(&self, dev: &mut Device, val: u16) -> Result {
+ let phydev = dev.0.get();
+ // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Device`.
+ // So it's just an FFI call.
+ to_result(unsafe {
+ bindings::phy_write_mmd(phydev, self.devad.0.into(), self.regnum.into(), val)
+ })
+ }
+
+ fn read_status(dev: &mut Device) -> Result<u16> {
+ let phydev = dev.0.get();
+ // SAFETY: `phydev` is pointing to a valid object by the type invariant of `Self`.
+ // So it's just an FFI call.
+ let ret = unsafe { bindings::genphy_c45_read_status(phydev) };
+ to_result(ret)?;
+ Ok(ret as u16)
+ }
+}
diff --git a/rust/kernel/of.rs b/rust/kernel/of.rs
new file mode 100644
index 000000000000..04f2d8ef29cb
--- /dev/null
+++ b/rust/kernel/of.rs
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Device Tree / Open Firmware abstractions.
+
+use crate::{bindings, device_id::RawDeviceId, prelude::*};
+
+/// IdTable type for OF drivers.
+pub type IdTable<T> = &'static dyn kernel::device_id::IdTable<DeviceId, T>;
+
+/// An open firmware device id.
+#[repr(transparent)]
+#[derive(Clone, Copy)]
+pub struct DeviceId(bindings::of_device_id);
+
+// SAFETY:
+// * `DeviceId` is a `#[repr(transparent)` wrapper of `struct of_device_id` and does not add
+// additional invariants, so it's safe to transmute to `RawType`.
+// * `DRIVER_DATA_OFFSET` is the offset to the `data` field.
+unsafe impl RawDeviceId for DeviceId {
+ type RawType = bindings::of_device_id;
+
+ const DRIVER_DATA_OFFSET: usize = core::mem::offset_of!(bindings::of_device_id, data);
+
+ fn index(&self) -> usize {
+ self.0.data as _
+ }
+}
+
+impl DeviceId {
+ /// Create a new device id from an OF 'compatible' string.
+ pub const fn new(compatible: &'static CStr) -> Self {
+ let src = compatible.as_bytes_with_nul();
+ // Replace with `bindings::of_device_id::default()` once stabilized for `const`.
+ // SAFETY: FFI type is valid to be zero-initialized.
+ let mut of: bindings::of_device_id = unsafe { core::mem::zeroed() };
+
+ // TODO: Use `clone_from_slice` once the corresponding types do match.
+ let mut i = 0;
+ while i < src.len() {
+ of.compatible[i] = src[i] as _;
+ i += 1;
+ }
+
+ Self(of)
+ }
+}
+
+/// Create an OF `IdTable` with an "alias" for modpost.
+#[macro_export]
+macro_rules! of_device_table {
+ ($table_name:ident, $module_table_name:ident, $id_info_type: ty, $table_data: expr) => {
+ const $table_name: $crate::device_id::IdArray<
+ $crate::of::DeviceId,
+ $id_info_type,
+ { $table_data.len() },
+ > = $crate::device_id::IdArray::new($table_data);
+
+ $crate::module_device_table!("of", $module_table_name, $table_name);
+ };
+}
diff --git a/rust/kernel/page.rs b/rust/kernel/page.rs
new file mode 100644
index 000000000000..f6126aca33a6
--- /dev/null
+++ b/rust/kernel/page.rs
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Kernel page allocation and management.
+
+use crate::{
+ alloc::{AllocError, Flags},
+ bindings,
+ error::code::*,
+ error::Result,
+ uaccess::UserSliceReader,
+};
+use core::ptr::{self, NonNull};
+
+/// A bitwise shift for the page size.
+pub const PAGE_SHIFT: usize = bindings::PAGE_SHIFT as usize;
+
+/// The number of bytes in a page.
+pub const PAGE_SIZE: usize = bindings::PAGE_SIZE;
+
+/// A bitmask that gives the page containing a given address.
+pub const PAGE_MASK: usize = !(PAGE_SIZE - 1);
+
+/// Round up the given number to the next multiple of [`PAGE_SIZE`].
+///
+/// It is incorrect to pass an address where the next multiple of [`PAGE_SIZE`] doesn't fit in a
+/// [`usize`].
+pub const fn page_align(addr: usize) -> usize {
+ // Parentheses around `PAGE_SIZE - 1` to avoid triggering overflow sanitizers in the wrong
+ // cases.
+ (addr + (PAGE_SIZE - 1)) & PAGE_MASK
+}
+
+/// A pointer to a page that owns the page allocation.
+///
+/// # Invariants
+///
+/// The pointer is valid, and has ownership over the page.
+pub struct Page {
+ page: NonNull<bindings::page>,
+}
+
+// SAFETY: Pages have no logic that relies on them staying on a given thread, so moving them across
+// threads is safe.
+unsafe impl Send for Page {}
+
+// SAFETY: Pages have no logic that relies on them not being accessed concurrently, so accessing
+// them concurrently is safe.
+unsafe impl Sync for Page {}
+
+impl Page {
+ /// Allocates a new page.
+ ///
+ /// # Examples
+ ///
+ /// Allocate memory for a page.
+ ///
+ /// ```
+ /// use kernel::page::Page;
+ ///
+ /// let page = Page::alloc_page(GFP_KERNEL)?;
+ /// # Ok::<(), kernel::alloc::AllocError>(())
+ /// ```
+ ///
+ /// Allocate memory for a page and zero its contents.
+ ///
+ /// ```
+ /// use kernel::page::Page;
+ ///
+ /// let page = Page::alloc_page(GFP_KERNEL | __GFP_ZERO)?;
+ /// # Ok::<(), kernel::alloc::AllocError>(())
+ /// ```
+ pub fn alloc_page(flags: Flags) -> Result<Self, AllocError> {
+ // SAFETY: Depending on the value of `gfp_flags`, this call may sleep. Other than that, it
+ // is always safe to call this method.
+ let page = unsafe { bindings::alloc_pages(flags.as_raw(), 0) };
+ let page = NonNull::new(page).ok_or(AllocError)?;
+ // INVARIANT: We just successfully allocated a page, so we now have ownership of the newly
+ // allocated page. We transfer that ownership to the new `Page` object.
+ Ok(Self { page })
+ }
+
+ /// Returns a raw pointer to the page.
+ pub fn as_ptr(&self) -> *mut bindings::page {
+ self.page.as_ptr()
+ }
+
+ /// Runs a piece of code with this page mapped to an address.
+ ///
+ /// The page is unmapped when this call returns.
+ ///
+ /// # Using the raw pointer
+ ///
+ /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
+ /// `PAGE_SIZE` bytes and for the duration in which the closure is called. The pointer might
+ /// only be mapped on the current thread, and when that is the case, dereferencing it on other
+ /// threads is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't
+ /// cause data races, the memory may be uninitialized, and so on.
+ ///
+ /// If multiple threads map the same page at the same time, then they may reference with
+ /// different addresses. However, even if the addresses are different, the underlying memory is
+ /// still the same for these purposes (e.g., it's still a data race if they both write to the
+ /// same underlying byte at the same time).
+ fn with_page_mapped<T>(&self, f: impl FnOnce(*mut u8) -> T) -> T {
+ // SAFETY: `page` is valid due to the type invariants on `Page`.
+ let mapped_addr = unsafe { bindings::kmap_local_page(self.as_ptr()) };
+
+ let res = f(mapped_addr.cast());
+
+ // This unmaps the page mapped above.
+ //
+ // SAFETY: Since this API takes the user code as a closure, it can only be used in a manner
+ // where the pages are unmapped in reverse order. This is as required by `kunmap_local`.
+ //
+ // In other words, if this call to `kunmap_local` happens when a different page should be
+ // unmapped first, then there must necessarily be a call to `kmap_local_page` other than the
+ // call just above in `with_page_mapped` that made that possible. In this case, it is the
+ // unsafe block that wraps that other call that is incorrect.
+ unsafe { bindings::kunmap_local(mapped_addr) };
+
+ res
+ }
+
+ /// Runs a piece of code with a raw pointer to a slice of this page, with bounds checking.
+ ///
+ /// If `f` is called, then it will be called with a pointer that points at `off` bytes into the
+ /// page, and the pointer will be valid for at least `len` bytes. The pointer is only valid on
+ /// this task, as this method uses a local mapping.
+ ///
+ /// If `off` and `len` refers to a region outside of this page, then this method returns
+ /// [`EINVAL`] and does not call `f`.
+ ///
+ /// # Using the raw pointer
+ ///
+ /// It is up to the caller to use the provided raw pointer correctly. The pointer is valid for
+ /// `len` bytes and for the duration in which the closure is called. The pointer might only be
+ /// mapped on the current thread, and when that is the case, dereferencing it on other threads
+ /// is UB. Other than that, the usual rules for dereferencing a raw pointer apply: don't cause
+ /// data races, the memory may be uninitialized, and so on.
+ ///
+ /// If multiple threads map the same page at the same time, then they may reference with
+ /// different addresses. However, even if the addresses are different, the underlying memory is
+ /// still the same for these purposes (e.g., it's still a data race if they both write to the
+ /// same underlying byte at the same time).
+ fn with_pointer_into_page<T>(
+ &self,
+ off: usize,
+ len: usize,
+ f: impl FnOnce(*mut u8) -> Result<T>,
+ ) -> Result<T> {
+ let bounds_ok = off <= PAGE_SIZE && len <= PAGE_SIZE && (off + len) <= PAGE_SIZE;
+
+ if bounds_ok {
+ self.with_page_mapped(move |page_addr| {
+ // SAFETY: The `off` integer is at most `PAGE_SIZE`, so this pointer offset will
+ // result in a pointer that is in bounds or one off the end of the page.
+ f(unsafe { page_addr.add(off) })
+ })
+ } else {
+ Err(EINVAL)
+ }
+ }
+
+ /// Maps the page and reads from it into the given buffer.
+ ///
+ /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
+ /// outside of the page, then this call returns [`EINVAL`].
+ ///
+ /// # Safety
+ ///
+ /// * Callers must ensure that `dst` is valid for writing `len` bytes.
+ /// * Callers must ensure that this call does not race with a write to the same page that
+ /// overlaps with this read.
+ pub unsafe fn read_raw(&self, dst: *mut u8, offset: usize, len: usize) -> Result {
+ self.with_pointer_into_page(offset, len, move |src| {
+ // SAFETY: If `with_pointer_into_page` calls into this closure, then
+ // it has performed a bounds check and guarantees that `src` is
+ // valid for `len` bytes.
+ //
+ // There caller guarantees that there is no data race.
+ unsafe { ptr::copy_nonoverlapping(src, dst, len) };
+ Ok(())
+ })
+ }
+
+ /// Maps the page and writes into it from the given buffer.
+ ///
+ /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
+ /// outside of the page, then this call returns [`EINVAL`].
+ ///
+ /// # Safety
+ ///
+ /// * Callers must ensure that `src` is valid for reading `len` bytes.
+ /// * Callers must ensure that this call does not race with a read or write to the same page
+ /// that overlaps with this write.
+ pub unsafe fn write_raw(&self, src: *const u8, offset: usize, len: usize) -> Result {
+ self.with_pointer_into_page(offset, len, move |dst| {
+ // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
+ // bounds check and guarantees that `dst` is valid for `len` bytes.
+ //
+ // There caller guarantees that there is no data race.
+ unsafe { ptr::copy_nonoverlapping(src, dst, len) };
+ Ok(())
+ })
+ }
+
+ /// Maps the page and zeroes the given slice.
+ ///
+ /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
+ /// outside of the page, then this call returns [`EINVAL`].
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that this call does not race with a read or write to the same page that
+ /// overlaps with this write.
+ pub unsafe fn fill_zero_raw(&self, offset: usize, len: usize) -> Result {
+ self.with_pointer_into_page(offset, len, move |dst| {
+ // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
+ // bounds check and guarantees that `dst` is valid for `len` bytes.
+ //
+ // There caller guarantees that there is no data race.
+ unsafe { ptr::write_bytes(dst, 0u8, len) };
+ Ok(())
+ })
+ }
+
+ /// Copies data from userspace into this page.
+ ///
+ /// This method will perform bounds checks on the page offset. If `offset .. offset+len` goes
+ /// outside of the page, then this call returns [`EINVAL`].
+ ///
+ /// Like the other `UserSliceReader` methods, data races are allowed on the userspace address.
+ /// However, they are not allowed on the page you are copying into.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that this call does not race with a read or write to the same page that
+ /// overlaps with this write.
+ pub unsafe fn copy_from_user_slice_raw(
+ &self,
+ reader: &mut UserSliceReader,
+ offset: usize,
+ len: usize,
+ ) -> Result {
+ self.with_pointer_into_page(offset, len, move |dst| {
+ // SAFETY: If `with_pointer_into_page` calls into this closure, then it has performed a
+ // bounds check and guarantees that `dst` is valid for `len` bytes. Furthermore, we have
+ // exclusive access to the slice since the caller guarantees that there are no races.
+ reader.read_raw(unsafe { core::slice::from_raw_parts_mut(dst.cast(), len) })
+ })
+ }
+}
+
+impl Drop for Page {
+ fn drop(&mut self) {
+ // SAFETY: By the type invariants, we have ownership of the page and can free it.
+ unsafe { bindings::__free_pages(self.page.as_ptr(), 0) };
+ }
+}
diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs
new file mode 100644
index 000000000000..4c98b5b9aa1e
--- /dev/null
+++ b/rust/kernel/pci.rs
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Abstractions for the PCI bus.
+//!
+//! C header: [`include/linux/pci.h`](srctree/include/linux/pci.h)
+
+use crate::{
+ alloc::flags::*,
+ bindings, container_of, device,
+ device_id::RawDeviceId,
+ devres::Devres,
+ driver,
+ error::{to_result, Result},
+ io::Io,
+ io::IoRaw,
+ str::CStr,
+ types::{ARef, ForeignOwnable, Opaque},
+ ThisModule,
+};
+use core::{ops::Deref, ptr::addr_of_mut};
+use kernel::prelude::*;
+
+/// An adapter for the registration of PCI drivers.
+pub struct Adapter<T: Driver>(T);
+
+// SAFETY: A call to `unregister` for a given instance of `RegType` is guaranteed to be valid if
+// a preceding call to `register` has been successful.
+unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> {
+ type RegType = bindings::pci_driver;
+
+ unsafe fn register(
+ pdrv: &Opaque<Self::RegType>,
+ name: &'static CStr,
+ module: &'static ThisModule,
+ ) -> Result {
+ // SAFETY: It's safe to set the fields of `struct pci_driver` on initialization.
+ unsafe {
+ (*pdrv.get()).name = name.as_char_ptr();
+ (*pdrv.get()).probe = Some(Self::probe_callback);
+ (*pdrv.get()).remove = Some(Self::remove_callback);
+ (*pdrv.get()).id_table = T::ID_TABLE.as_ptr();
+ }
+
+ // SAFETY: `pdrv` is guaranteed to be a valid `RegType`.
+ to_result(unsafe {
+ bindings::__pci_register_driver(pdrv.get(), module.0, name.as_char_ptr())
+ })
+ }
+
+ unsafe fn unregister(pdrv: &Opaque<Self::RegType>) {
+ // SAFETY: `pdrv` is guaranteed to be a valid `RegType`.
+ unsafe { bindings::pci_unregister_driver(pdrv.get()) }
+ }
+}
+
+impl<T: Driver + 'static> Adapter<T> {
+ extern "C" fn probe_callback(
+ pdev: *mut bindings::pci_dev,
+ id: *const bindings::pci_device_id,
+ ) -> kernel::ffi::c_int {
+ // SAFETY: The PCI bus only ever calls the probe callback with a valid pointer to a
+ // `struct pci_dev`.
+ let dev = unsafe { device::Device::get_device(addr_of_mut!((*pdev).dev)) };
+ // SAFETY: `dev` is guaranteed to be embedded in a valid `struct pci_dev` by the call
+ // above.
+ let mut pdev = unsafe { Device::from_dev(dev) };
+
+ // SAFETY: `DeviceId` is a `#[repr(transparent)` wrapper of `struct pci_device_id` and
+ // does not add additional invariants, so it's safe to transmute.
+ let id = unsafe { &*id.cast::<DeviceId>() };
+ let info = T::ID_TABLE.info(id.index());
+
+ match T::probe(&mut pdev, info) {
+ Ok(data) => {
+ // Let the `struct pci_dev` own a reference of the driver's private data.
+ // SAFETY: By the type invariant `pdev.as_raw` returns a valid pointer to a
+ // `struct pci_dev`.
+ unsafe { bindings::pci_set_drvdata(pdev.as_raw(), data.into_foreign() as _) };
+ }
+ Err(err) => return Error::to_errno(err),
+ }
+
+ 0
+ }
+
+ extern "C" fn remove_callback(pdev: *mut bindings::pci_dev) {
+ // SAFETY: The PCI bus only ever calls the remove callback with a valid pointer to a
+ // `struct pci_dev`.
+ let ptr = unsafe { bindings::pci_get_drvdata(pdev) };
+
+ // SAFETY: `remove_callback` is only ever called after a successful call to
+ // `probe_callback`, hence it's guaranteed that `ptr` points to a valid and initialized
+ // `KBox<T>` pointer created through `KBox::into_foreign`.
+ let _ = unsafe { KBox::<T>::from_foreign(ptr) };
+ }
+}
+
+/// Declares a kernel module that exposes a single PCI driver.
+///
+/// # Example
+///
+///```ignore
+/// kernel::module_pci_driver! {
+/// type: MyDriver,
+/// name: "Module name",
+/// author: "Author name",
+/// description: "Description",
+/// license: "GPL v2",
+/// }
+///```
+#[macro_export]
+macro_rules! module_pci_driver {
+($($f:tt)*) => {
+ $crate::module_driver!(<T>, $crate::pci::Adapter<T>, { $($f)* });
+};
+}
+
+/// Abstraction for bindings::pci_device_id.
+#[repr(transparent)]
+#[derive(Clone, Copy)]
+pub struct DeviceId(bindings::pci_device_id);
+
+impl DeviceId {
+ const PCI_ANY_ID: u32 = !0;
+
+ /// Equivalent to C's `PCI_DEVICE` macro.
+ ///
+ /// Create a new `pci::DeviceId` from a vendor and device ID number.
+ pub const fn from_id(vendor: u32, device: u32) -> Self {
+ Self(bindings::pci_device_id {
+ vendor,
+ device,
+ subvendor: DeviceId::PCI_ANY_ID,
+ subdevice: DeviceId::PCI_ANY_ID,
+ class: 0,
+ class_mask: 0,
+ driver_data: 0,
+ override_only: 0,
+ })
+ }
+
+ /// Equivalent to C's `PCI_DEVICE_CLASS` macro.
+ ///
+ /// Create a new `pci::DeviceId` from a class number and mask.
+ pub const fn from_class(class: u32, class_mask: u32) -> Self {
+ Self(bindings::pci_device_id {
+ vendor: DeviceId::PCI_ANY_ID,
+ device: DeviceId::PCI_ANY_ID,
+ subvendor: DeviceId::PCI_ANY_ID,
+ subdevice: DeviceId::PCI_ANY_ID,
+ class,
+ class_mask,
+ driver_data: 0,
+ override_only: 0,
+ })
+ }
+}
+
+// SAFETY:
+// * `DeviceId` is a `#[repr(transparent)` wrapper of `pci_device_id` and does not add
+// additional invariants, so it's safe to transmute to `RawType`.
+// * `DRIVER_DATA_OFFSET` is the offset to the `driver_data` field.
+unsafe impl RawDeviceId for DeviceId {
+ type RawType = bindings::pci_device_id;
+
+ const DRIVER_DATA_OFFSET: usize = core::mem::offset_of!(bindings::pci_device_id, driver_data);
+
+ fn index(&self) -> usize {
+ self.0.driver_data as _
+ }
+}
+
+/// IdTable type for PCI
+pub type IdTable<T> = &'static dyn kernel::device_id::IdTable<DeviceId, T>;
+
+/// Create a PCI `IdTable` with its alias for modpost.
+#[macro_export]
+macro_rules! pci_device_table {
+ ($table_name:ident, $module_table_name:ident, $id_info_type: ty, $table_data: expr) => {
+ const $table_name: $crate::device_id::IdArray<
+ $crate::pci::DeviceId,
+ $id_info_type,
+ { $table_data.len() },
+ > = $crate::device_id::IdArray::new($table_data);
+
+ $crate::module_device_table!("pci", $module_table_name, $table_name);
+ };
+}
+
+/// The PCI driver trait.
+///
+/// # Example
+///
+///```
+/// # use kernel::{bindings, pci};
+///
+/// struct MyDriver;
+///
+/// kernel::pci_device_table!(
+/// PCI_TABLE,
+/// MODULE_PCI_TABLE,
+/// <MyDriver as pci::Driver>::IdInfo,
+/// [
+/// (pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_REDHAT, bindings::PCI_ANY_ID as _), ())
+/// ]
+/// );
+///
+/// impl pci::Driver for MyDriver {
+/// type IdInfo = ();
+/// const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE;
+///
+/// fn probe(
+/// _pdev: &mut pci::Device,
+/// _id_info: &Self::IdInfo,
+/// ) -> Result<Pin<KBox<Self>>> {
+/// Err(ENODEV)
+/// }
+/// }
+///```
+/// Drivers must implement this trait in order to get a PCI driver registered. Please refer to the
+/// `Adapter` documentation for an example.
+pub trait Driver {
+ /// The type holding information about each device id supported by the driver.
+ ///
+ /// TODO: Use associated_type_defaults once stabilized:
+ ///
+ /// type IdInfo: 'static = ();
+ type IdInfo: 'static;
+
+ /// The table of device ids supported by the driver.
+ const ID_TABLE: IdTable<Self::IdInfo>;
+
+ /// PCI driver probe.
+ ///
+ /// Called when a new platform device is added or discovered.
+ /// Implementers should attempt to initialize the device here.
+ fn probe(dev: &mut Device, id_info: &Self::IdInfo) -> Result<Pin<KBox<Self>>>;
+}
+
+/// The PCI device representation.
+///
+/// A PCI device is based on an always reference counted `device:Device` instance. Cloning a PCI
+/// device, hence, also increments the base device' reference count.
+///
+/// # Invariants
+///
+/// `Device` hold a valid reference of `ARef<device::Device>` whose underlying `struct device` is a
+/// member of a `struct pci_dev`.
+#[derive(Clone)]
+pub struct Device(ARef<device::Device>);
+
+/// A PCI BAR to perform I/O-Operations on.
+///
+/// # Invariants
+///
+/// `Bar` always holds an `IoRaw` inststance that holds a valid pointer to the start of the I/O
+/// memory mapped PCI bar and its size.
+pub struct Bar<const SIZE: usize = 0> {
+ pdev: Device,
+ io: IoRaw<SIZE>,
+ num: i32,
+}
+
+impl<const SIZE: usize> Bar<SIZE> {
+ fn new(pdev: Device, num: u32, name: &CStr) -> Result<Self> {
+ let len = pdev.resource_len(num)?;
+ if len == 0 {
+ return Err(ENOMEM);
+ }
+
+ // Convert to `i32`, since that's what all the C bindings use.
+ let num = i32::try_from(num)?;
+
+ // SAFETY:
+ // `pdev` is valid by the invariants of `Device`.
+ // `num` is checked for validity by a previous call to `Device::resource_len`.
+ // `name` is always valid.
+ let ret = unsafe { bindings::pci_request_region(pdev.as_raw(), num, name.as_char_ptr()) };
+ if ret != 0 {
+ return Err(EBUSY);
+ }
+
+ // SAFETY:
+ // `pdev` is valid by the invariants of `Device`.
+ // `num` is checked for validity by a previous call to `Device::resource_len`.
+ // `name` is always valid.
+ let ioptr: usize = unsafe { bindings::pci_iomap(pdev.as_raw(), num, 0) } as usize;
+ if ioptr == 0 {
+ // SAFETY:
+ // `pdev` valid by the invariants of `Device`.
+ // `num` is checked for validity by a previous call to `Device::resource_len`.
+ unsafe { bindings::pci_release_region(pdev.as_raw(), num) };
+ return Err(ENOMEM);
+ }
+
+ let io = match IoRaw::new(ioptr, len as usize) {
+ Ok(io) => io,
+ Err(err) => {
+ // SAFETY:
+ // `pdev` is valid by the invariants of `Device`.
+ // `ioptr` is guaranteed to be the start of a valid I/O mapped memory region.
+ // `num` is checked for validity by a previous call to `Device::resource_len`.
+ unsafe { Self::do_release(&pdev, ioptr, num) };
+ return Err(err);
+ }
+ };
+
+ Ok(Bar { pdev, io, num })
+ }
+
+ /// # Safety
+ ///
+ /// `ioptr` must be a valid pointer to the memory mapped PCI bar number `num`.
+ unsafe fn do_release(pdev: &Device, ioptr: usize, num: i32) {
+ // SAFETY:
+ // `pdev` is valid by the invariants of `Device`.
+ // `ioptr` is valid by the safety requirements.
+ // `num` is valid by the safety requirements.
+ unsafe {
+ bindings::pci_iounmap(pdev.as_raw(), ioptr as _);
+ bindings::pci_release_region(pdev.as_raw(), num);
+ }
+ }
+
+ fn release(&self) {
+ // SAFETY: The safety requirements are guaranteed by the type invariant of `self.pdev`.
+ unsafe { Self::do_release(&self.pdev, self.io.addr(), self.num) };
+ }
+}
+
+impl Bar {
+ fn index_is_valid(index: u32) -> bool {
+ // A `struct pci_dev` owns an array of resources with at most `PCI_NUM_RESOURCES` entries.
+ index < bindings::PCI_NUM_RESOURCES
+ }
+}
+
+impl<const SIZE: usize> Drop for Bar<SIZE> {
+ fn drop(&mut self) {
+ self.release();
+ }
+}
+
+impl<const SIZE: usize> Deref for Bar<SIZE> {
+ type Target = Io<SIZE>;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: By the type invariant of `Self`, the MMIO range in `self.io` is properly mapped.
+ unsafe { Io::from_raw(&self.io) }
+ }
+}
+
+impl Device {
+ /// Create a PCI Device instance from an existing `device::Device`.
+ ///
+ /// # Safety
+ ///
+ /// `dev` must be an `ARef<device::Device>` whose underlying `bindings::device` is a member of
+ /// a `bindings::pci_dev`.
+ pub unsafe fn from_dev(dev: ARef<device::Device>) -> Self {
+ Self(dev)
+ }
+
+ fn as_raw(&self) -> *mut bindings::pci_dev {
+ // SAFETY: By the type invariant `self.0.as_raw` is a pointer to the `struct device`
+ // embedded in `struct pci_dev`.
+ unsafe { container_of!(self.0.as_raw(), bindings::pci_dev, dev) as _ }
+ }
+
+ /// Returns the PCI vendor ID.
+ pub fn vendor_id(&self) -> u16 {
+ // SAFETY: `self.as_raw` is a valid pointer to a `struct pci_dev`.
+ unsafe { (*self.as_raw()).vendor }
+ }
+
+ /// Returns the PCI device ID.
+ pub fn device_id(&self) -> u16 {
+ // SAFETY: `self.as_raw` is a valid pointer to a `struct pci_dev`.
+ unsafe { (*self.as_raw()).device }
+ }
+
+ /// Enable memory resources for this device.
+ pub fn enable_device_mem(&self) -> Result {
+ // SAFETY: `self.as_raw` is guaranteed to be a pointer to a valid `struct pci_dev`.
+ let ret = unsafe { bindings::pci_enable_device_mem(self.as_raw()) };
+ if ret != 0 {
+ Err(Error::from_errno(ret))
+ } else {
+ Ok(())
+ }
+ }
+
+ /// Enable bus-mastering for this device.
+ pub fn set_master(&self) {
+ // SAFETY: `self.as_raw` is guaranteed to be a pointer to a valid `struct pci_dev`.
+ unsafe { bindings::pci_set_master(self.as_raw()) };
+ }
+
+ /// Returns the size of the given PCI bar resource.
+ pub fn resource_len(&self, bar: u32) -> Result<bindings::resource_size_t> {
+ if !Bar::index_is_valid(bar) {
+ return Err(EINVAL);
+ }
+
+ // SAFETY:
+ // - `bar` is a valid bar number, as guaranteed by the above call to `Bar::index_is_valid`,
+ // - by its type invariant `self.as_raw` is always a valid pointer to a `struct pci_dev`.
+ Ok(unsafe { bindings::pci_resource_len(self.as_raw(), bar.try_into()?) })
+ }
+
+ /// Mapps an entire PCI-BAR after performing a region-request on it. I/O operation bound checks
+ /// can be performed on compile time for offsets (plus the requested type size) < SIZE.
+ pub fn iomap_region_sized<const SIZE: usize>(
+ &self,
+ bar: u32,
+ name: &CStr,
+ ) -> Result<Devres<Bar<SIZE>>> {
+ let bar = Bar::<SIZE>::new(self.clone(), bar, name)?;
+ let devres = Devres::new(self.as_ref(), bar, GFP_KERNEL)?;
+
+ Ok(devres)
+ }
+
+ /// Mapps an entire PCI-BAR after performing a region-request on it.
+ pub fn iomap_region(&self, bar: u32, name: &CStr) -> Result<Devres<Bar>> {
+ self.iomap_region_sized::<0>(bar, name)
+ }
+}
+
+impl AsRef<device::Device> for Device {
+ fn as_ref(&self) -> &device::Device {
+ &self.0
+ }
+}
diff --git a/rust/kernel/pid_namespace.rs b/rust/kernel/pid_namespace.rs
new file mode 100644
index 000000000000..0e93808e4639
--- /dev/null
+++ b/rust/kernel/pid_namespace.rs
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
+
+//! Pid namespaces.
+//!
+//! C header: [`include/linux/pid_namespace.h`](srctree/include/linux/pid_namespace.h) and
+//! [`include/linux/pid.h`](srctree/include/linux/pid.h)
+
+use crate::{
+ bindings,
+ types::{AlwaysRefCounted, Opaque},
+};
+use core::ptr;
+
+/// Wraps the kernel's `struct pid_namespace`. Thread safe.
+///
+/// This structure represents the Rust abstraction for a C `struct pid_namespace`. This
+/// implementation abstracts the usage of an already existing C `struct pid_namespace` within Rust
+/// code that we get passed from the C side.
+#[repr(transparent)]
+pub struct PidNamespace {
+ inner: Opaque<bindings::pid_namespace>,
+}
+
+impl PidNamespace {
+ /// Returns a raw pointer to the inner C struct.
+ #[inline]
+ pub fn as_ptr(&self) -> *mut bindings::pid_namespace {
+ self.inner.get()
+ }
+
+ /// Creates a reference to a [`PidNamespace`] from a valid pointer.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that `ptr` is valid and remains valid for the lifetime of the
+ /// returned [`PidNamespace`] reference.
+ pub unsafe fn from_ptr<'a>(ptr: *const bindings::pid_namespace) -> &'a Self {
+ // SAFETY: The safety requirements guarantee the validity of the dereference, while the
+ // `PidNamespace` type being transparent makes the cast ok.
+ unsafe { &*ptr.cast() }
+ }
+}
+
+// SAFETY: Instances of `PidNamespace` are always reference-counted.
+unsafe impl AlwaysRefCounted for PidNamespace {
+ #[inline]
+ fn inc_ref(&self) {
+ // SAFETY: The existence of a shared reference means that the refcount is nonzero.
+ unsafe { bindings::get_pid_ns(self.as_ptr()) };
+ }
+
+ #[inline]
+ unsafe fn dec_ref(obj: ptr::NonNull<PidNamespace>) {
+ // SAFETY: The safety requirements guarantee that the refcount is non-zero.
+ unsafe { bindings::put_pid_ns(obj.cast().as_ptr()) }
+ }
+}
+
+// SAFETY:
+// - `PidNamespace::dec_ref` can be called from any thread.
+// - It is okay to send ownership of `PidNamespace` across thread boundaries.
+unsafe impl Send for PidNamespace {}
+
+// SAFETY: It's OK to access `PidNamespace` through shared references from other threads because
+// we're either accessing properties that don't change or that are properly synchronised by C code.
+unsafe impl Sync for PidNamespace {}
diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs
new file mode 100644
index 000000000000..50e6b0421813
--- /dev/null
+++ b/rust/kernel/platform.rs
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Abstractions for the platform bus.
+//!
+//! C header: [`include/linux/platform_device.h`](srctree/include/linux/platform_device.h)
+
+use crate::{
+ bindings, container_of, device, driver,
+ error::{to_result, Result},
+ of,
+ prelude::*,
+ str::CStr,
+ types::{ARef, ForeignOwnable, Opaque},
+ ThisModule,
+};
+
+use core::ptr::addr_of_mut;
+
+/// An adapter for the registration of platform drivers.
+pub struct Adapter<T: Driver>(T);
+
+// SAFETY: A call to `unregister` for a given instance of `RegType` is guaranteed to be valid if
+// a preceding call to `register` has been successful.
+unsafe impl<T: Driver + 'static> driver::RegistrationOps for Adapter<T> {
+ type RegType = bindings::platform_driver;
+
+ unsafe fn register(
+ pdrv: &Opaque<Self::RegType>,
+ name: &'static CStr,
+ module: &'static ThisModule,
+ ) -> Result {
+ let of_table = match T::OF_ID_TABLE {
+ Some(table) => table.as_ptr(),
+ None => core::ptr::null(),
+ };
+
+ // SAFETY: It's safe to set the fields of `struct platform_driver` on initialization.
+ unsafe {
+ (*pdrv.get()).driver.name = name.as_char_ptr();
+ (*pdrv.get()).probe = Some(Self::probe_callback);
+ (*pdrv.get()).remove = Some(Self::remove_callback);
+ (*pdrv.get()).driver.of_match_table = of_table;
+ }
+
+ // SAFETY: `pdrv` is guaranteed to be a valid `RegType`.
+ to_result(unsafe { bindings::__platform_driver_register(pdrv.get(), module.0) })
+ }
+
+ unsafe fn unregister(pdrv: &Opaque<Self::RegType>) {
+ // SAFETY: `pdrv` is guaranteed to be a valid `RegType`.
+ unsafe { bindings::platform_driver_unregister(pdrv.get()) };
+ }
+}
+
+impl<T: Driver + 'static> Adapter<T> {
+ extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> kernel::ffi::c_int {
+ // SAFETY: The platform bus only ever calls the probe callback with a valid `pdev`.
+ let dev = unsafe { device::Device::get_device(addr_of_mut!((*pdev).dev)) };
+ // SAFETY: `dev` is guaranteed to be embedded in a valid `struct platform_device` by the
+ // call above.
+ let mut pdev = unsafe { Device::from_dev(dev) };
+
+ let info = <Self as driver::Adapter>::id_info(pdev.as_ref());
+ match T::probe(&mut pdev, info) {
+ Ok(data) => {
+ // Let the `struct platform_device` own a reference of the driver's private data.
+ // SAFETY: By the type invariant `pdev.as_raw` returns a valid pointer to a
+ // `struct platform_device`.
+ unsafe { bindings::platform_set_drvdata(pdev.as_raw(), data.into_foreign() as _) };
+ }
+ Err(err) => return Error::to_errno(err),
+ }
+
+ 0
+ }
+
+ extern "C" fn remove_callback(pdev: *mut bindings::platform_device) {
+ // SAFETY: `pdev` is a valid pointer to a `struct platform_device`.
+ let ptr = unsafe { bindings::platform_get_drvdata(pdev) };
+
+ // SAFETY: `remove_callback` is only ever called after a successful call to
+ // `probe_callback`, hence it's guaranteed that `ptr` points to a valid and initialized
+ // `KBox<T>` pointer created through `KBox::into_foreign`.
+ let _ = unsafe { KBox::<T>::from_foreign(ptr) };
+ }
+}
+
+impl<T: Driver + 'static> driver::Adapter for Adapter<T> {
+ type IdInfo = T::IdInfo;
+
+ fn of_id_table() -> Option<of::IdTable<Self::IdInfo>> {
+ T::OF_ID_TABLE
+ }
+}
+
+/// Declares a kernel module that exposes a single platform driver.
+///
+/// # Examples
+///
+/// ```ignore
+/// kernel::module_platform_driver! {
+/// type: MyDriver,
+/// name: "Module name",
+/// author: "Author name",
+/// description: "Description",
+/// license: "GPL v2",
+/// }
+/// ```
+#[macro_export]
+macro_rules! module_platform_driver {
+ ($($f:tt)*) => {
+ $crate::module_driver!(<T>, $crate::platform::Adapter<T>, { $($f)* });
+ };
+}
+
+/// The platform driver trait.
+///
+/// Drivers must implement this trait in order to get a platform driver registered.
+///
+/// # Example
+///
+///```
+/// # use kernel::{bindings, c_str, of, platform};
+///
+/// struct MyDriver;
+///
+/// kernel::of_device_table!(
+/// OF_TABLE,
+/// MODULE_OF_TABLE,
+/// <MyDriver as platform::Driver>::IdInfo,
+/// [
+/// (of::DeviceId::new(c_str!("test,device")), ())
+/// ]
+/// );
+///
+/// impl platform::Driver for MyDriver {
+/// type IdInfo = ();
+/// const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
+///
+/// fn probe(
+/// _pdev: &mut platform::Device,
+/// _id_info: Option<&Self::IdInfo>,
+/// ) -> Result<Pin<KBox<Self>>> {
+/// Err(ENODEV)
+/// }
+/// }
+///```
+pub trait Driver {
+ /// The type holding driver private data about each device id supported by the driver.
+ ///
+ /// TODO: Use associated_type_defaults once stabilized:
+ ///
+ /// type IdInfo: 'static = ();
+ type IdInfo: 'static;
+
+ /// The table of OF device ids supported by the driver.
+ const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>>;
+
+ /// Platform driver probe.
+ ///
+ /// Called when a new platform device is added or discovered.
+ /// Implementers should attempt to initialize the device here.
+ fn probe(dev: &mut Device, id_info: Option<&Self::IdInfo>) -> Result<Pin<KBox<Self>>>;
+}
+
+/// The platform device representation.
+///
+/// A platform device is based on an always reference counted `device:Device` instance. Cloning a
+/// platform device, hence, also increments the base device' reference count.
+///
+/// # Invariants
+///
+/// `Device` holds a valid reference of `ARef<device::Device>` whose underlying `struct device` is a
+/// member of a `struct platform_device`.
+#[derive(Clone)]
+pub struct Device(ARef<device::Device>);
+
+impl Device {
+ /// Convert a raw kernel device into a `Device`
+ ///
+ /// # Safety
+ ///
+ /// `dev` must be an `Aref<device::Device>` whose underlying `bindings::device` is a member of a
+ /// `bindings::platform_device`.
+ unsafe fn from_dev(dev: ARef<device::Device>) -> Self {
+ Self(dev)
+ }
+
+ fn as_raw(&self) -> *mut bindings::platform_device {
+ // SAFETY: By the type invariant `self.0.as_raw` is a pointer to the `struct device`
+ // embedded in `struct platform_device`.
+ unsafe { container_of!(self.0.as_raw(), bindings::platform_device, dev) }.cast_mut()
+ }
+}
+
+impl AsRef<device::Device> for Device {
+ fn as_ref(&self) -> &device::Device {
+ &self.0
+ }
+}
diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs
index ae21600970b3..dde2e0649790 100644
--- a/rust/kernel/prelude.rs
+++ b/rust/kernel/prelude.rs
@@ -14,17 +14,18 @@
#[doc(no_inline)]
pub use core::pin::Pin;
-#[doc(no_inline)]
-pub use alloc::{boxed::Box, vec::Vec};
+pub use crate::alloc::{flags::*, Box, KBox, KVBox, KVVec, KVec, VBox, VVec, Vec};
#[doc(no_inline)]
pub use macros::{module, pin_data, pinned_drop, vtable, Zeroable};
-pub use super::build_assert;
+pub use super::{build_assert, build_error};
// `super::std_vendor` is hidden, which makes the macro inline for some reason.
#[doc(no_inline)]
pub use super::dbg;
+pub use super::fmt;
+pub use super::{dev_alert, dev_crit, dev_dbg, dev_emerg, dev_err, dev_info, dev_notice, dev_warn};
pub use super::{pr_alert, pr_crit, pr_debug, pr_emerg, pr_err, pr_info, pr_notice, pr_warn};
pub use super::{init, pin_init, try_init, try_pin_init};
@@ -35,6 +36,6 @@ pub use super::error::{code::*, Error, Result};
pub use super::{str::CStr, ThisModule};
-pub use super::init::{InPlaceInit, Init, PinInit};
+pub use super::init::{InPlaceInit, InPlaceWrite, Init, PinInit};
pub use super::current;
diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs
index 9b13aca832c2..b19ee490be58 100644
--- a/rust/kernel/print.rs
+++ b/rust/kernel/print.rs
@@ -4,7 +4,7 @@
//!
//! C header: [`include/linux/printk.h`](srctree/include/linux/printk.h)
//!
-//! Reference: <https://www.kernel.org/doc/html/latest/core-api/printk-basics.html>
+//! Reference: <https://docs.kernel.org/core-api/printk-basics.html>
use core::{
ffi::{c_char, c_void},
@@ -13,10 +13,8 @@ use core::{
use crate::str::RawFormatter;
-#[cfg(CONFIG_PRINTK)]
-use crate::bindings;
-
// Called from `vsprintf` with format specifier `%pA`.
+#[expect(clippy::missing_safety_doc)]
#[no_mangle]
unsafe extern "C" fn rust_fmt_argument(
buf: *mut c_char,
@@ -26,6 +24,7 @@ unsafe extern "C" fn rust_fmt_argument(
use fmt::Write;
// SAFETY: The C contract guarantees that `buf` is valid if it's less than `end`.
let mut w = unsafe { RawFormatter::from_ptrs(buf.cast(), end.cast()) };
+ // SAFETY: TODO.
let _ = w.write_fmt(unsafe { *(ptr as *const fmt::Arguments<'_>) });
w.pos().cast()
}
@@ -35,8 +34,6 @@ unsafe extern "C" fn rust_fmt_argument(
/// Public but hidden since it should only be used from public macros.
#[doc(hidden)]
pub mod format_strings {
- use crate::bindings;
-
/// The length we copy from the `KERN_*` kernel prefixes.
const LENGTH_PREFIX: usize = 2;
@@ -107,9 +104,10 @@ pub unsafe fn call_printk(
) {
// `_printk` does not seem to fail in any path.
#[cfg(CONFIG_PRINTK)]
+ // SAFETY: TODO.
unsafe {
bindings::_printk(
- format_string.as_ptr() as _,
+ format_string.as_ptr(),
module_name.as_ptr(),
&args as *const _ as *const c_void,
);
@@ -130,7 +128,7 @@ pub fn call_printk_cont(args: fmt::Arguments<'_>) {
#[cfg(CONFIG_PRINTK)]
unsafe {
bindings::_printk(
- format_strings::CONT.as_ptr() as _,
+ format_strings::CONT.as_ptr(),
&args as *const _ as *const c_void,
);
}
@@ -142,7 +140,7 @@ pub fn call_printk_cont(args: fmt::Arguments<'_>) {
#[doc(hidden)]
#[cfg(not(testlib))]
#[macro_export]
-#[allow(clippy::crate_in_macro_def)]
+#[expect(clippy::crate_in_macro_def)]
macro_rules! print_macro (
// The non-continuation cases (most of them, e.g. `INFO`).
($format_string:path, false, $($arg:tt)+) => (
@@ -202,7 +200,7 @@ macro_rules! print_macro (
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
/// `alloc::format!` for information about the formatting syntax.
///
-/// [`pr_emerg`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_emerg
+/// [`pr_emerg`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_emerg
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
///
/// # Examples
@@ -226,7 +224,7 @@ macro_rules! pr_emerg (
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
/// `alloc::format!` for information about the formatting syntax.
///
-/// [`pr_alert`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_alert
+/// [`pr_alert`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_alert
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
///
/// # Examples
@@ -250,7 +248,7 @@ macro_rules! pr_alert (
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
/// `alloc::format!` for information about the formatting syntax.
///
-/// [`pr_crit`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_crit
+/// [`pr_crit`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_crit
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
///
/// # Examples
@@ -274,7 +272,7 @@ macro_rules! pr_crit (
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
/// `alloc::format!` for information about the formatting syntax.
///
-/// [`pr_err`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_err
+/// [`pr_err`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_err
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
///
/// # Examples
@@ -298,7 +296,7 @@ macro_rules! pr_err (
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
/// `alloc::format!` for information about the formatting syntax.
///
-/// [`pr_warn`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_warn
+/// [`pr_warn`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_warn
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
///
/// # Examples
@@ -322,7 +320,7 @@ macro_rules! pr_warn (
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
/// `alloc::format!` for information about the formatting syntax.
///
-/// [`pr_notice`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_notice
+/// [`pr_notice`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_notice
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
///
/// # Examples
@@ -346,7 +344,7 @@ macro_rules! pr_notice (
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
/// `alloc::format!` for information about the formatting syntax.
///
-/// [`pr_info`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_info
+/// [`pr_info`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_info
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
///
/// # Examples
@@ -372,7 +370,7 @@ macro_rules! pr_info (
/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and
/// `alloc::format!` for information about the formatting syntax.
///
-/// [`pr_debug`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_debug
+/// [`pr_debug`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_debug
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
///
/// # Examples
@@ -400,7 +398,7 @@ macro_rules! pr_debug (
/// `alloc::format!` for information about the formatting syntax.
///
/// [`pr_info!`]: crate::pr_info!
-/// [`pr_cont`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_cont
+/// [`pr_cont`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_cont
/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
///
/// # Examples
diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs
new file mode 100644
index 000000000000..0d1e75810664
--- /dev/null
+++ b/rust/kernel/rbtree.rs
@@ -0,0 +1,1286 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Red-black trees.
+//!
+//! C header: [`include/linux/rbtree.h`](srctree/include/linux/rbtree.h)
+//!
+//! Reference: <https://docs.kernel.org/core-api/rbtree.html>
+
+use crate::{alloc::Flags, bindings, container_of, error::Result, prelude::*};
+use core::{
+ cmp::{Ord, Ordering},
+ marker::PhantomData,
+ mem::MaybeUninit,
+ ptr::{addr_of_mut, from_mut, NonNull},
+};
+
+/// A red-black tree with owned nodes.
+///
+/// It is backed by the kernel C red-black trees.
+///
+/// # Examples
+///
+/// In the example below we do several operations on a tree. We note that insertions may fail if
+/// the system is out of memory.
+///
+/// ```
+/// use kernel::{alloc::flags, rbtree::{RBTree, RBTreeNode, RBTreeNodeReservation}};
+///
+/// // Create a new tree.
+/// let mut tree = RBTree::new();
+///
+/// // Insert three elements.
+/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?;
+///
+/// // Check the nodes we just inserted.
+/// {
+/// assert_eq!(tree.get(&10), Some(&100));
+/// assert_eq!(tree.get(&20), Some(&200));
+/// assert_eq!(tree.get(&30), Some(&300));
+/// }
+///
+/// // Iterate over the nodes we just inserted.
+/// {
+/// let mut iter = tree.iter();
+/// assert_eq!(iter.next(), Some((&10, &100)));
+/// assert_eq!(iter.next(), Some((&20, &200)));
+/// assert_eq!(iter.next(), Some((&30, &300)));
+/// assert!(iter.next().is_none());
+/// }
+///
+/// // Print all elements.
+/// for (key, value) in &tree {
+/// pr_info!("{} = {}\n", key, value);
+/// }
+///
+/// // Replace one of the elements.
+/// tree.try_create_and_insert(10, 1000, flags::GFP_KERNEL)?;
+///
+/// // Check that the tree reflects the replacement.
+/// {
+/// let mut iter = tree.iter();
+/// assert_eq!(iter.next(), Some((&10, &1000)));
+/// assert_eq!(iter.next(), Some((&20, &200)));
+/// assert_eq!(iter.next(), Some((&30, &300)));
+/// assert!(iter.next().is_none());
+/// }
+///
+/// // Change the value of one of the elements.
+/// *tree.get_mut(&30).unwrap() = 3000;
+///
+/// // Check that the tree reflects the update.
+/// {
+/// let mut iter = tree.iter();
+/// assert_eq!(iter.next(), Some((&10, &1000)));
+/// assert_eq!(iter.next(), Some((&20, &200)));
+/// assert_eq!(iter.next(), Some((&30, &3000)));
+/// assert!(iter.next().is_none());
+/// }
+///
+/// // Remove an element.
+/// tree.remove(&10);
+///
+/// // Check that the tree reflects the removal.
+/// {
+/// let mut iter = tree.iter();
+/// assert_eq!(iter.next(), Some((&20, &200)));
+/// assert_eq!(iter.next(), Some((&30, &3000)));
+/// assert!(iter.next().is_none());
+/// }
+///
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// In the example below, we first allocate a node, acquire a spinlock, then insert the node into
+/// the tree. This is useful when the insertion context does not allow sleeping, for example, when
+/// holding a spinlock.
+///
+/// ```
+/// use kernel::{alloc::flags, rbtree::{RBTree, RBTreeNode}, sync::SpinLock};
+///
+/// fn insert_test(tree: &SpinLock<RBTree<u32, u32>>) -> Result {
+/// // Pre-allocate node. This may fail (as it allocates memory).
+/// let node = RBTreeNode::new(10, 100, flags::GFP_KERNEL)?;
+///
+/// // Insert node while holding the lock. It is guaranteed to succeed with no allocation
+/// // attempts.
+/// let mut guard = tree.lock();
+/// guard.insert(node);
+/// Ok(())
+/// }
+/// ```
+///
+/// In the example below, we reuse an existing node allocation from an element we removed.
+///
+/// ```
+/// use kernel::{alloc::flags, rbtree::{RBTree, RBTreeNodeReservation}};
+///
+/// // Create a new tree.
+/// let mut tree = RBTree::new();
+///
+/// // Insert three elements.
+/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?;
+///
+/// // Check the nodes we just inserted.
+/// {
+/// let mut iter = tree.iter();
+/// assert_eq!(iter.next(), Some((&10, &100)));
+/// assert_eq!(iter.next(), Some((&20, &200)));
+/// assert_eq!(iter.next(), Some((&30, &300)));
+/// assert!(iter.next().is_none());
+/// }
+///
+/// // Remove a node, getting back ownership of it.
+/// let existing = tree.remove(&30);
+///
+/// // Check that the tree reflects the removal.
+/// {
+/// let mut iter = tree.iter();
+/// assert_eq!(iter.next(), Some((&10, &100)));
+/// assert_eq!(iter.next(), Some((&20, &200)));
+/// assert!(iter.next().is_none());
+/// }
+///
+/// // Create a preallocated reservation that we can re-use later.
+/// let reservation = RBTreeNodeReservation::new(flags::GFP_KERNEL)?;
+///
+/// // Insert a new node into the tree, reusing the previous allocation. This is guaranteed to
+/// // succeed (no memory allocations).
+/// tree.insert(reservation.into_node(15, 150));
+///
+/// // Check that the tree reflect the new insertion.
+/// {
+/// let mut iter = tree.iter();
+/// assert_eq!(iter.next(), Some((&10, &100)));
+/// assert_eq!(iter.next(), Some((&15, &150)));
+/// assert_eq!(iter.next(), Some((&20, &200)));
+/// assert!(iter.next().is_none());
+/// }
+///
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// # Invariants
+///
+/// Non-null parent/children pointers stored in instances of the `rb_node` C struct are always
+/// valid, and pointing to a field of our internal representation of a node.
+pub struct RBTree<K, V> {
+ root: bindings::rb_root,
+ _p: PhantomData<Node<K, V>>,
+}
+
+// SAFETY: An [`RBTree`] allows the same kinds of access to its values that a struct allows to its
+// fields, so we use the same Send condition as would be used for a struct with K and V fields.
+unsafe impl<K: Send, V: Send> Send for RBTree<K, V> {}
+
+// SAFETY: An [`RBTree`] allows the same kinds of access to its values that a struct allows to its
+// fields, so we use the same Sync condition as would be used for a struct with K and V fields.
+unsafe impl<K: Sync, V: Sync> Sync for RBTree<K, V> {}
+
+impl<K, V> RBTree<K, V> {
+ /// Creates a new and empty tree.
+ pub fn new() -> Self {
+ Self {
+ // INVARIANT: There are no nodes in the tree, so the invariant holds vacuously.
+ root: bindings::rb_root::default(),
+ _p: PhantomData,
+ }
+ }
+
+ /// Returns an iterator over the tree nodes, sorted by key.
+ pub fn iter(&self) -> Iter<'_, K, V> {
+ Iter {
+ _tree: PhantomData,
+ // INVARIANT:
+ // - `self.root` is a valid pointer to a tree root.
+ // - `bindings::rb_first` produces a valid pointer to a node given `root` is valid.
+ iter_raw: IterRaw {
+ // SAFETY: by the invariants, all pointers are valid.
+ next: unsafe { bindings::rb_first(&self.root) },
+ _phantom: PhantomData,
+ },
+ }
+ }
+
+ /// Returns a mutable iterator over the tree nodes, sorted by key.
+ pub fn iter_mut(&mut self) -> IterMut<'_, K, V> {
+ IterMut {
+ _tree: PhantomData,
+ // INVARIANT:
+ // - `self.root` is a valid pointer to a tree root.
+ // - `bindings::rb_first` produces a valid pointer to a node given `root` is valid.
+ iter_raw: IterRaw {
+ // SAFETY: by the invariants, all pointers are valid.
+ next: unsafe { bindings::rb_first(from_mut(&mut self.root)) },
+ _phantom: PhantomData,
+ },
+ }
+ }
+
+ /// Returns an iterator over the keys of the nodes in the tree, in sorted order.
+ pub fn keys(&self) -> impl Iterator<Item = &'_ K> {
+ self.iter().map(|(k, _)| k)
+ }
+
+ /// Returns an iterator over the values of the nodes in the tree, sorted by key.
+ pub fn values(&self) -> impl Iterator<Item = &'_ V> {
+ self.iter().map(|(_, v)| v)
+ }
+
+ /// Returns a mutable iterator over the values of the nodes in the tree, sorted by key.
+ pub fn values_mut(&mut self) -> impl Iterator<Item = &'_ mut V> {
+ self.iter_mut().map(|(_, v)| v)
+ }
+
+ /// Returns a cursor over the tree nodes, starting with the smallest key.
+ pub fn cursor_front(&mut self) -> Option<Cursor<'_, K, V>> {
+ let root = addr_of_mut!(self.root);
+ // SAFETY: `self.root` is always a valid root node
+ let current = unsafe { bindings::rb_first(root) };
+ NonNull::new(current).map(|current| {
+ // INVARIANT:
+ // - `current` is a valid node in the [`RBTree`] pointed to by `self`.
+ Cursor {
+ current,
+ tree: self,
+ }
+ })
+ }
+
+ /// Returns a cursor over the tree nodes, starting with the largest key.
+ pub fn cursor_back(&mut self) -> Option<Cursor<'_, K, V>> {
+ let root = addr_of_mut!(self.root);
+ // SAFETY: `self.root` is always a valid root node
+ let current = unsafe { bindings::rb_last(root) };
+ NonNull::new(current).map(|current| {
+ // INVARIANT:
+ // - `current` is a valid node in the [`RBTree`] pointed to by `self`.
+ Cursor {
+ current,
+ tree: self,
+ }
+ })
+ }
+}
+
+impl<K, V> RBTree<K, V>
+where
+ K: Ord,
+{
+ /// Tries to insert a new value into the tree.
+ ///
+ /// It overwrites a node if one already exists with the same key and returns it (containing the
+ /// key/value pair). Returns [`None`] if a node with the same key didn't already exist.
+ ///
+ /// Returns an error if it cannot allocate memory for the new node.
+ pub fn try_create_and_insert(
+ &mut self,
+ key: K,
+ value: V,
+ flags: Flags,
+ ) -> Result<Option<RBTreeNode<K, V>>> {
+ Ok(self.insert(RBTreeNode::new(key, value, flags)?))
+ }
+
+ /// Inserts a new node into the tree.
+ ///
+ /// It overwrites a node if one already exists with the same key and returns it (containing the
+ /// key/value pair). Returns [`None`] if a node with the same key didn't already exist.
+ ///
+ /// This function always succeeds.
+ pub fn insert(&mut self, node: RBTreeNode<K, V>) -> Option<RBTreeNode<K, V>> {
+ match self.raw_entry(&node.node.key) {
+ RawEntry::Occupied(entry) => Some(entry.replace(node)),
+ RawEntry::Vacant(entry) => {
+ entry.insert(node);
+ None
+ }
+ }
+ }
+
+ fn raw_entry(&mut self, key: &K) -> RawEntry<'_, K, V> {
+ let raw_self: *mut RBTree<K, V> = self;
+ // The returned `RawEntry` is used to call either `rb_link_node` or `rb_replace_node`.
+ // The parameters of `bindings::rb_link_node` are as follows:
+ // - `node`: A pointer to an uninitialized node being inserted.
+ // - `parent`: A pointer to an existing node in the tree. One of its child pointers must be
+ // null, and `node` will become a child of `parent` by replacing that child pointer
+ // with a pointer to `node`.
+ // - `rb_link`: A pointer to either the left-child or right-child field of `parent`. This
+ // specifies which child of `parent` should hold `node` after this call. The
+ // value of `*rb_link` must be null before the call to `rb_link_node`. If the
+ // red/black tree is empty, then it’s also possible for `parent` to be null. In
+ // this case, `rb_link` is a pointer to the `root` field of the red/black tree.
+ //
+ // We will traverse the tree looking for a node that has a null pointer as its child,
+ // representing an empty subtree where we can insert our new node. We need to make sure
+ // that we preserve the ordering of the nodes in the tree. In each iteration of the loop
+ // we store `parent` and `child_field_of_parent`, and the new `node` will go somewhere
+ // in the subtree of `parent` that `child_field_of_parent` points at. Once
+ // we find an empty subtree, we can insert the new node using `rb_link_node`.
+ let mut parent = core::ptr::null_mut();
+ let mut child_field_of_parent: &mut *mut bindings::rb_node =
+ // SAFETY: `raw_self` is a valid pointer to the `RBTree` (created from `self` above).
+ unsafe { &mut (*raw_self).root.rb_node };
+ while !(*child_field_of_parent).is_null() {
+ let curr = *child_field_of_parent;
+ // SAFETY: All links fields we create are in a `Node<K, V>`.
+ let node = unsafe { container_of!(curr, Node<K, V>, links) };
+
+ // SAFETY: `node` is a non-null node so it is valid by the type invariants.
+ match key.cmp(unsafe { &(*node).key }) {
+ // SAFETY: `curr` is a non-null node so it is valid by the type invariants.
+ Ordering::Less => child_field_of_parent = unsafe { &mut (*curr).rb_left },
+ // SAFETY: `curr` is a non-null node so it is valid by the type invariants.
+ Ordering::Greater => child_field_of_parent = unsafe { &mut (*curr).rb_right },
+ Ordering::Equal => {
+ return RawEntry::Occupied(OccupiedEntry {
+ rbtree: self,
+ node_links: curr,
+ })
+ }
+ }
+ parent = curr;
+ }
+
+ RawEntry::Vacant(RawVacantEntry {
+ rbtree: raw_self,
+ parent,
+ child_field_of_parent,
+ _phantom: PhantomData,
+ })
+ }
+
+ /// Gets the given key's corresponding entry in the map for in-place manipulation.
+ pub fn entry(&mut self, key: K) -> Entry<'_, K, V> {
+ match self.raw_entry(&key) {
+ RawEntry::Occupied(entry) => Entry::Occupied(entry),
+ RawEntry::Vacant(entry) => Entry::Vacant(VacantEntry { raw: entry, key }),
+ }
+ }
+
+ /// Used for accessing the given node, if it exists.
+ pub fn find_mut(&mut self, key: &K) -> Option<OccupiedEntry<'_, K, V>> {
+ match self.raw_entry(key) {
+ RawEntry::Occupied(entry) => Some(entry),
+ RawEntry::Vacant(_entry) => None,
+ }
+ }
+
+ /// Returns a reference to the value corresponding to the key.
+ pub fn get(&self, key: &K) -> Option<&V> {
+ let mut node = self.root.rb_node;
+ while !node.is_null() {
+ // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self`
+ // point to the links field of `Node<K, V>` objects.
+ let this = unsafe { container_of!(node, Node<K, V>, links) };
+ // SAFETY: `this` is a non-null node so it is valid by the type invariants.
+ node = match key.cmp(unsafe { &(*this).key }) {
+ // SAFETY: `node` is a non-null node so it is valid by the type invariants.
+ Ordering::Less => unsafe { (*node).rb_left },
+ // SAFETY: `node` is a non-null node so it is valid by the type invariants.
+ Ordering::Greater => unsafe { (*node).rb_right },
+ // SAFETY: `node` is a non-null node so it is valid by the type invariants.
+ Ordering::Equal => return Some(unsafe { &(*this).value }),
+ }
+ }
+ None
+ }
+
+ /// Returns a mutable reference to the value corresponding to the key.
+ pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
+ self.find_mut(key).map(|node| node.into_mut())
+ }
+
+ /// Removes the node with the given key from the tree.
+ ///
+ /// It returns the node that was removed if one exists, or [`None`] otherwise.
+ pub fn remove_node(&mut self, key: &K) -> Option<RBTreeNode<K, V>> {
+ self.find_mut(key).map(OccupiedEntry::remove_node)
+ }
+
+ /// Removes the node with the given key from the tree.
+ ///
+ /// It returns the value that was removed if one exists, or [`None`] otherwise.
+ pub fn remove(&mut self, key: &K) -> Option<V> {
+ self.find_mut(key).map(OccupiedEntry::remove)
+ }
+
+ /// Returns a cursor over the tree nodes based on the given key.
+ ///
+ /// If the given key exists, the cursor starts there.
+ /// Otherwise it starts with the first larger key in sort order.
+ /// If there is no larger key, it returns [`None`].
+ pub fn cursor_lower_bound(&mut self, key: &K) -> Option<Cursor<'_, K, V>>
+ where
+ K: Ord,
+ {
+ let mut node = self.root.rb_node;
+ let mut best_match: Option<NonNull<Node<K, V>>> = None;
+ while !node.is_null() {
+ // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self`
+ // point to the links field of `Node<K, V>` objects.
+ let this = unsafe { container_of!(node, Node<K, V>, links) }.cast_mut();
+ // SAFETY: `this` is a non-null node so it is valid by the type invariants.
+ let this_key = unsafe { &(*this).key };
+ // SAFETY: `node` is a non-null node so it is valid by the type invariants.
+ let left_child = unsafe { (*node).rb_left };
+ // SAFETY: `node` is a non-null node so it is valid by the type invariants.
+ let right_child = unsafe { (*node).rb_right };
+ match key.cmp(this_key) {
+ Ordering::Equal => {
+ best_match = NonNull::new(this);
+ break;
+ }
+ Ordering::Greater => {
+ node = right_child;
+ }
+ Ordering::Less => {
+ let is_better_match = match best_match {
+ None => true,
+ Some(best) => {
+ // SAFETY: `best` is a non-null node so it is valid by the type invariants.
+ let best_key = unsafe { &(*best.as_ptr()).key };
+ best_key > this_key
+ }
+ };
+ if is_better_match {
+ best_match = NonNull::new(this);
+ }
+ node = left_child;
+ }
+ };
+ }
+
+ let best = best_match?;
+
+ // SAFETY: `best` is a non-null node so it is valid by the type invariants.
+ let links = unsafe { addr_of_mut!((*best.as_ptr()).links) };
+
+ NonNull::new(links).map(|current| {
+ // INVARIANT:
+ // - `current` is a valid node in the [`RBTree`] pointed to by `self`.
+ Cursor {
+ current,
+ tree: self,
+ }
+ })
+ }
+}
+
+impl<K, V> Default for RBTree<K, V> {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<K, V> Drop for RBTree<K, V> {
+ fn drop(&mut self) {
+ // SAFETY: `root` is valid as it's embedded in `self` and we have a valid `self`.
+ let mut next = unsafe { bindings::rb_first_postorder(&self.root) };
+
+ // INVARIANT: The loop invariant is that all tree nodes from `next` in postorder are valid.
+ while !next.is_null() {
+ // SAFETY: All links fields we create are in a `Node<K, V>`.
+ let this = unsafe { container_of!(next, Node<K, V>, links) };
+
+ // Find out what the next node is before disposing of the current one.
+ // SAFETY: `next` and all nodes in postorder are still valid.
+ next = unsafe { bindings::rb_next_postorder(next) };
+
+ // INVARIANT: This is the destructor, so we break the type invariant during clean-up,
+ // but it is not observable. The loop invariant is still maintained.
+
+ // SAFETY: `this` is valid per the loop invariant.
+ unsafe { drop(KBox::from_raw(this.cast_mut())) };
+ }
+ }
+}
+
+/// A bidirectional cursor over the tree nodes, sorted by key.
+///
+/// # Examples
+///
+/// In the following example, we obtain a cursor to the first element in the tree.
+/// The cursor allows us to iterate bidirectionally over key/value pairs in the tree.
+///
+/// ```
+/// use kernel::{alloc::flags, rbtree::RBTree};
+///
+/// // Create a new tree.
+/// let mut tree = RBTree::new();
+///
+/// // Insert three elements.
+/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?;
+///
+/// // Get a cursor to the first element.
+/// let mut cursor = tree.cursor_front().unwrap();
+/// let mut current = cursor.current();
+/// assert_eq!(current, (&10, &100));
+///
+/// // Move the cursor, updating it to the 2nd element.
+/// cursor = cursor.move_next().unwrap();
+/// current = cursor.current();
+/// assert_eq!(current, (&20, &200));
+///
+/// // Peek at the next element without impacting the cursor.
+/// let next = cursor.peek_next().unwrap();
+/// assert_eq!(next, (&30, &300));
+/// current = cursor.current();
+/// assert_eq!(current, (&20, &200));
+///
+/// // Moving past the last element causes the cursor to return [`None`].
+/// cursor = cursor.move_next().unwrap();
+/// current = cursor.current();
+/// assert_eq!(current, (&30, &300));
+/// let cursor = cursor.move_next();
+/// assert!(cursor.is_none());
+///
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// A cursor can also be obtained at the last element in the tree.
+///
+/// ```
+/// use kernel::{alloc::flags, rbtree::RBTree};
+///
+/// // Create a new tree.
+/// let mut tree = RBTree::new();
+///
+/// // Insert three elements.
+/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?;
+///
+/// let mut cursor = tree.cursor_back().unwrap();
+/// let current = cursor.current();
+/// assert_eq!(current, (&30, &300));
+///
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// Obtaining a cursor returns [`None`] if the tree is empty.
+///
+/// ```
+/// use kernel::rbtree::RBTree;
+///
+/// let mut tree: RBTree<u16, u16> = RBTree::new();
+/// assert!(tree.cursor_front().is_none());
+///
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// [`RBTree::cursor_lower_bound`] can be used to start at an arbitrary node in the tree.
+///
+/// ```
+/// use kernel::{alloc::flags, rbtree::RBTree};
+///
+/// // Create a new tree.
+/// let mut tree = RBTree::new();
+///
+/// // Insert five elements.
+/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(40, 400, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(50, 500, flags::GFP_KERNEL)?;
+///
+/// // If the provided key exists, a cursor to that key is returned.
+/// let cursor = tree.cursor_lower_bound(&20).unwrap();
+/// let current = cursor.current();
+/// assert_eq!(current, (&20, &200));
+///
+/// // If the provided key doesn't exist, a cursor to the first larger element in sort order is returned.
+/// let cursor = tree.cursor_lower_bound(&25).unwrap();
+/// let current = cursor.current();
+/// assert_eq!(current, (&30, &300));
+///
+/// // If there is no larger key, [`None`] is returned.
+/// let cursor = tree.cursor_lower_bound(&55);
+/// assert!(cursor.is_none());
+///
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// The cursor allows mutation of values in the tree.
+///
+/// ```
+/// use kernel::{alloc::flags, rbtree::RBTree};
+///
+/// // Create a new tree.
+/// let mut tree = RBTree::new();
+///
+/// // Insert three elements.
+/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?;
+///
+/// // Retrieve a cursor.
+/// let mut cursor = tree.cursor_front().unwrap();
+///
+/// // Get a mutable reference to the current value.
+/// let (k, v) = cursor.current_mut();
+/// *v = 1000;
+///
+/// // The updated value is reflected in the tree.
+/// let updated = tree.get(&10).unwrap();
+/// assert_eq!(updated, &1000);
+///
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// It also allows node removal. The following examples demonstrate the behavior of removing the current node.
+///
+/// ```
+/// use kernel::{alloc::flags, rbtree::RBTree};
+///
+/// // Create a new tree.
+/// let mut tree = RBTree::new();
+///
+/// // Insert three elements.
+/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?;
+///
+/// // Remove the first element.
+/// let mut cursor = tree.cursor_front().unwrap();
+/// let mut current = cursor.current();
+/// assert_eq!(current, (&10, &100));
+/// cursor = cursor.remove_current().0.unwrap();
+///
+/// // If a node exists after the current element, it is returned.
+/// current = cursor.current();
+/// assert_eq!(current, (&20, &200));
+///
+/// // Get a cursor to the last element, and remove it.
+/// cursor = tree.cursor_back().unwrap();
+/// current = cursor.current();
+/// assert_eq!(current, (&30, &300));
+///
+/// // Since there is no next node, the previous node is returned.
+/// cursor = cursor.remove_current().0.unwrap();
+/// current = cursor.current();
+/// assert_eq!(current, (&20, &200));
+///
+/// // Removing the last element in the tree returns [`None`].
+/// assert!(cursor.remove_current().0.is_none());
+///
+/// # Ok::<(), Error>(())
+/// ```
+///
+/// Nodes adjacent to the current node can also be removed.
+///
+/// ```
+/// use kernel::{alloc::flags, rbtree::RBTree};
+///
+/// // Create a new tree.
+/// let mut tree = RBTree::new();
+///
+/// // Insert three elements.
+/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?;
+/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?;
+///
+/// // Get a cursor to the first element.
+/// let mut cursor = tree.cursor_front().unwrap();
+/// let mut current = cursor.current();
+/// assert_eq!(current, (&10, &100));
+///
+/// // Calling `remove_prev` from the first element returns [`None`].
+/// assert!(cursor.remove_prev().is_none());
+///
+/// // Get a cursor to the last element.
+/// cursor = tree.cursor_back().unwrap();
+/// current = cursor.current();
+/// assert_eq!(current, (&30, &300));
+///
+/// // Calling `remove_prev` removes and returns the middle element.
+/// assert_eq!(cursor.remove_prev().unwrap().to_key_value(), (20, 200));
+///
+/// // Calling `remove_next` from the last element returns [`None`].
+/// assert!(cursor.remove_next().is_none());
+///
+/// // Move to the first element
+/// cursor = cursor.move_prev().unwrap();
+/// current = cursor.current();
+/// assert_eq!(current, (&10, &100));
+///
+/// // Calling `remove_next` removes and returns the last element.
+/// assert_eq!(cursor.remove_next().unwrap().to_key_value(), (30, 300));
+///
+/// # Ok::<(), Error>(())
+///
+/// ```
+///
+/// # Invariants
+/// - `current` points to a node that is in the same [`RBTree`] as `tree`.
+pub struct Cursor<'a, K, V> {
+ tree: &'a mut RBTree<K, V>,
+ current: NonNull<bindings::rb_node>,
+}
+
+// SAFETY: The [`Cursor`] has exclusive access to both `K` and `V`, so it is sufficient to require them to be `Send`.
+// The cursor only gives out immutable references to the keys, but since it has excusive access to those same
+// keys, `Send` is sufficient. `Sync` would be okay, but it is more restrictive to the user.
+unsafe impl<'a, K: Send, V: Send> Send for Cursor<'a, K, V> {}
+
+// SAFETY: The [`Cursor`] gives out immutable references to K and mutable references to V,
+// so it has the same thread safety requirements as mutable references.
+unsafe impl<'a, K: Sync, V: Sync> Sync for Cursor<'a, K, V> {}
+
+impl<'a, K, V> Cursor<'a, K, V> {
+ /// The current node
+ pub fn current(&self) -> (&K, &V) {
+ // SAFETY:
+ // - `self.current` is a valid node by the type invariants.
+ // - We have an immutable reference by the function signature.
+ unsafe { Self::to_key_value(self.current) }
+ }
+
+ /// The current node, with a mutable value
+ pub fn current_mut(&mut self) -> (&K, &mut V) {
+ // SAFETY:
+ // - `self.current` is a valid node by the type invariants.
+ // - We have an mutable reference by the function signature.
+ unsafe { Self::to_key_value_mut(self.current) }
+ }
+
+ /// Remove the current node from the tree.
+ ///
+ /// Returns a tuple where the first element is a cursor to the next node, if it exists,
+ /// else the previous node, else [`None`] (if the tree becomes empty). The second element
+ /// is the removed node.
+ pub fn remove_current(self) -> (Option<Self>, RBTreeNode<K, V>) {
+ let prev = self.get_neighbor_raw(Direction::Prev);
+ let next = self.get_neighbor_raw(Direction::Next);
+ // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self`
+ // point to the links field of `Node<K, V>` objects.
+ let this = unsafe { container_of!(self.current.as_ptr(), Node<K, V>, links) }.cast_mut();
+ // SAFETY: `this` is valid by the type invariants as described above.
+ let node = unsafe { KBox::from_raw(this) };
+ let node = RBTreeNode { node };
+ // SAFETY: The reference to the tree used to create the cursor outlives the cursor, so
+ // the tree cannot change. By the tree invariant, all nodes are valid.
+ unsafe { bindings::rb_erase(&mut (*this).links, addr_of_mut!(self.tree.root)) };
+
+ let current = match (prev, next) {
+ (_, Some(next)) => next,
+ (Some(prev), None) => prev,
+ (None, None) => {
+ return (None, node);
+ }
+ };
+
+ (
+ // INVARIANT:
+ // - `current` is a valid node in the [`RBTree`] pointed to by `self.tree`.
+ Some(Self {
+ current,
+ tree: self.tree,
+ }),
+ node,
+ )
+ }
+
+ /// Remove the previous node, returning it if it exists.
+ pub fn remove_prev(&mut self) -> Option<RBTreeNode<K, V>> {
+ self.remove_neighbor(Direction::Prev)
+ }
+
+ /// Remove the next node, returning it if it exists.
+ pub fn remove_next(&mut self) -> Option<RBTreeNode<K, V>> {
+ self.remove_neighbor(Direction::Next)
+ }
+
+ fn remove_neighbor(&mut self, direction: Direction) -> Option<RBTreeNode<K, V>> {
+ if let Some(neighbor) = self.get_neighbor_raw(direction) {
+ let neighbor = neighbor.as_ptr();
+ // SAFETY: The reference to the tree used to create the cursor outlives the cursor, so
+ // the tree cannot change. By the tree invariant, all nodes are valid.
+ unsafe { bindings::rb_erase(neighbor, addr_of_mut!(self.tree.root)) };
+ // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self`
+ // point to the links field of `Node<K, V>` objects.
+ let this = unsafe { container_of!(neighbor, Node<K, V>, links) }.cast_mut();
+ // SAFETY: `this` is valid by the type invariants as described above.
+ let node = unsafe { KBox::from_raw(this) };
+ return Some(RBTreeNode { node });
+ }
+ None
+ }
+
+ /// Move the cursor to the previous node, returning [`None`] if it doesn't exist.
+ pub fn move_prev(self) -> Option<Self> {
+ self.mv(Direction::Prev)
+ }
+
+ /// Move the cursor to the next node, returning [`None`] if it doesn't exist.
+ pub fn move_next(self) -> Option<Self> {
+ self.mv(Direction::Next)
+ }
+
+ fn mv(self, direction: Direction) -> Option<Self> {
+ // INVARIANT:
+ // - `neighbor` is a valid node in the [`RBTree`] pointed to by `self.tree`.
+ self.get_neighbor_raw(direction).map(|neighbor| Self {
+ tree: self.tree,
+ current: neighbor,
+ })
+ }
+
+ /// Access the previous node without moving the cursor.
+ pub fn peek_prev(&self) -> Option<(&K, &V)> {
+ self.peek(Direction::Prev)
+ }
+
+ /// Access the previous node without moving the cursor.
+ pub fn peek_next(&self) -> Option<(&K, &V)> {
+ self.peek(Direction::Next)
+ }
+
+ fn peek(&self, direction: Direction) -> Option<(&K, &V)> {
+ self.get_neighbor_raw(direction).map(|neighbor| {
+ // SAFETY:
+ // - `neighbor` is a valid tree node.
+ // - By the function signature, we have an immutable reference to `self`.
+ unsafe { Self::to_key_value(neighbor) }
+ })
+ }
+
+ /// Access the previous node mutably without moving the cursor.
+ pub fn peek_prev_mut(&mut self) -> Option<(&K, &mut V)> {
+ self.peek_mut(Direction::Prev)
+ }
+
+ /// Access the next node mutably without moving the cursor.
+ pub fn peek_next_mut(&mut self) -> Option<(&K, &mut V)> {
+ self.peek_mut(Direction::Next)
+ }
+
+ fn peek_mut(&mut self, direction: Direction) -> Option<(&K, &mut V)> {
+ self.get_neighbor_raw(direction).map(|neighbor| {
+ // SAFETY:
+ // - `neighbor` is a valid tree node.
+ // - By the function signature, we have a mutable reference to `self`.
+ unsafe { Self::to_key_value_mut(neighbor) }
+ })
+ }
+
+ fn get_neighbor_raw(&self, direction: Direction) -> Option<NonNull<bindings::rb_node>> {
+ // SAFETY: `self.current` is valid by the type invariants.
+ let neighbor = unsafe {
+ match direction {
+ Direction::Prev => bindings::rb_prev(self.current.as_ptr()),
+ Direction::Next => bindings::rb_next(self.current.as_ptr()),
+ }
+ };
+
+ NonNull::new(neighbor)
+ }
+
+ /// # Safety
+ ///
+ /// - `node` must be a valid pointer to a node in an [`RBTree`].
+ /// - The caller has immutable access to `node` for the duration of 'b.
+ unsafe fn to_key_value<'b>(node: NonNull<bindings::rb_node>) -> (&'b K, &'b V) {
+ // SAFETY: the caller guarantees that `node` is a valid pointer in an `RBTree`.
+ let (k, v) = unsafe { Self::to_key_value_raw(node) };
+ // SAFETY: the caller guarantees immutable access to `node`.
+ (k, unsafe { &*v })
+ }
+
+ /// # Safety
+ ///
+ /// - `node` must be a valid pointer to a node in an [`RBTree`].
+ /// - The caller has mutable access to `node` for the duration of 'b.
+ unsafe fn to_key_value_mut<'b>(node: NonNull<bindings::rb_node>) -> (&'b K, &'b mut V) {
+ // SAFETY: the caller guarantees that `node` is a valid pointer in an `RBTree`.
+ let (k, v) = unsafe { Self::to_key_value_raw(node) };
+ // SAFETY: the caller guarantees mutable access to `node`.
+ (k, unsafe { &mut *v })
+ }
+
+ /// # Safety
+ ///
+ /// - `node` must be a valid pointer to a node in an [`RBTree`].
+ /// - The caller has immutable access to the key for the duration of 'b.
+ unsafe fn to_key_value_raw<'b>(node: NonNull<bindings::rb_node>) -> (&'b K, *mut V) {
+ // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self`
+ // point to the links field of `Node<K, V>` objects.
+ let this = unsafe { container_of!(node.as_ptr(), Node<K, V>, links) }.cast_mut();
+ // SAFETY: The passed `node` is the current node or a non-null neighbor,
+ // thus `this` is valid by the type invariants.
+ let k = unsafe { &(*this).key };
+ // SAFETY: The passed `node` is the current node or a non-null neighbor,
+ // thus `this` is valid by the type invariants.
+ let v = unsafe { addr_of_mut!((*this).value) };
+ (k, v)
+ }
+}
+
+/// Direction for [`Cursor`] operations.
+enum Direction {
+ /// the node immediately before, in sort order
+ Prev,
+ /// the node immediately after, in sort order
+ Next,
+}
+
+impl<'a, K, V> IntoIterator for &'a RBTree<K, V> {
+ type Item = (&'a K, &'a V);
+ type IntoIter = Iter<'a, K, V>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.iter()
+ }
+}
+
+/// An iterator over the nodes of a [`RBTree`].
+///
+/// Instances are created by calling [`RBTree::iter`].
+pub struct Iter<'a, K, V> {
+ _tree: PhantomData<&'a RBTree<K, V>>,
+ iter_raw: IterRaw<K, V>,
+}
+
+// SAFETY: The [`Iter`] gives out immutable references to K and V, so it has the same
+// thread safety requirements as immutable references.
+unsafe impl<'a, K: Sync, V: Sync> Send for Iter<'a, K, V> {}
+
+// SAFETY: The [`Iter`] gives out immutable references to K and V, so it has the same
+// thread safety requirements as immutable references.
+unsafe impl<'a, K: Sync, V: Sync> Sync for Iter<'a, K, V> {}
+
+impl<'a, K, V> Iterator for Iter<'a, K, V> {
+ type Item = (&'a K, &'a V);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // SAFETY: Due to `self._tree`, `k` and `v` are valid for the lifetime of `'a`.
+ self.iter_raw.next().map(|(k, v)| unsafe { (&*k, &*v) })
+ }
+}
+
+impl<'a, K, V> IntoIterator for &'a mut RBTree<K, V> {
+ type Item = (&'a K, &'a mut V);
+ type IntoIter = IterMut<'a, K, V>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.iter_mut()
+ }
+}
+
+/// A mutable iterator over the nodes of a [`RBTree`].
+///
+/// Instances are created by calling [`RBTree::iter_mut`].
+pub struct IterMut<'a, K, V> {
+ _tree: PhantomData<&'a mut RBTree<K, V>>,
+ iter_raw: IterRaw<K, V>,
+}
+
+// SAFETY: The [`IterMut`] has exclusive access to both `K` and `V`, so it is sufficient to require them to be `Send`.
+// The iterator only gives out immutable references to the keys, but since the iterator has excusive access to those same
+// keys, `Send` is sufficient. `Sync` would be okay, but it is more restrictive to the user.
+unsafe impl<'a, K: Send, V: Send> Send for IterMut<'a, K, V> {}
+
+// SAFETY: The [`IterMut`] gives out immutable references to K and mutable references to V, so it has the same
+// thread safety requirements as mutable references.
+unsafe impl<'a, K: Sync, V: Sync> Sync for IterMut<'a, K, V> {}
+
+impl<'a, K, V> Iterator for IterMut<'a, K, V> {
+ type Item = (&'a K, &'a mut V);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.iter_raw.next().map(|(k, v)|
+ // SAFETY: Due to `&mut self`, we have exclusive access to `k` and `v`, for the lifetime of `'a`.
+ unsafe { (&*k, &mut *v) })
+ }
+}
+
+/// A raw iterator over the nodes of a [`RBTree`].
+///
+/// # Invariants
+/// - `self.next` is a valid pointer.
+/// - `self.next` points to a node stored inside of a valid `RBTree`.
+struct IterRaw<K, V> {
+ next: *mut bindings::rb_node,
+ _phantom: PhantomData<fn() -> (K, V)>,
+}
+
+impl<K, V> Iterator for IterRaw<K, V> {
+ type Item = (*mut K, *mut V);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.next.is_null() {
+ return None;
+ }
+
+ // SAFETY: By the type invariant of `IterRaw`, `self.next` is a valid node in an `RBTree`,
+ // and by the type invariant of `RBTree`, all nodes point to the links field of `Node<K, V>` objects.
+ let cur = unsafe { container_of!(self.next, Node<K, V>, links) }.cast_mut();
+
+ // SAFETY: `self.next` is a valid tree node by the type invariants.
+ self.next = unsafe { bindings::rb_next(self.next) };
+
+ // SAFETY: By the same reasoning above, it is safe to dereference the node.
+ Some(unsafe { (addr_of_mut!((*cur).key), addr_of_mut!((*cur).value)) })
+ }
+}
+
+/// A memory reservation for a red-black tree node.
+///
+///
+/// It contains the memory needed to hold a node that can be inserted into a red-black tree. One
+/// can be obtained by directly allocating it ([`RBTreeNodeReservation::new`]).
+pub struct RBTreeNodeReservation<K, V> {
+ node: KBox<MaybeUninit<Node<K, V>>>,
+}
+
+impl<K, V> RBTreeNodeReservation<K, V> {
+ /// Allocates memory for a node to be eventually initialised and inserted into the tree via a
+ /// call to [`RBTree::insert`].
+ pub fn new(flags: Flags) -> Result<RBTreeNodeReservation<K, V>> {
+ Ok(RBTreeNodeReservation {
+ node: KBox::new_uninit(flags)?,
+ })
+ }
+}
+
+// SAFETY: This doesn't actually contain K or V, and is just a memory allocation. Those can always
+// be moved across threads.
+unsafe impl<K, V> Send for RBTreeNodeReservation<K, V> {}
+
+// SAFETY: This doesn't actually contain K or V, and is just a memory allocation.
+unsafe impl<K, V> Sync for RBTreeNodeReservation<K, V> {}
+
+impl<K, V> RBTreeNodeReservation<K, V> {
+ /// Initialises a node reservation.
+ ///
+ /// It then becomes an [`RBTreeNode`] that can be inserted into a tree.
+ pub fn into_node(self, key: K, value: V) -> RBTreeNode<K, V> {
+ let node = KBox::write(
+ self.node,
+ Node {
+ key,
+ value,
+ links: bindings::rb_node::default(),
+ },
+ );
+ RBTreeNode { node }
+ }
+}
+
+/// A red-black tree node.
+///
+/// The node is fully initialised (with key and value) and can be inserted into a tree without any
+/// extra allocations or failure paths.
+pub struct RBTreeNode<K, V> {
+ node: KBox<Node<K, V>>,
+}
+
+impl<K, V> RBTreeNode<K, V> {
+ /// Allocates and initialises a node that can be inserted into the tree via
+ /// [`RBTree::insert`].
+ pub fn new(key: K, value: V, flags: Flags) -> Result<RBTreeNode<K, V>> {
+ Ok(RBTreeNodeReservation::new(flags)?.into_node(key, value))
+ }
+
+ /// Get the key and value from inside the node.
+ pub fn to_key_value(self) -> (K, V) {
+ let node = KBox::into_inner(self.node);
+
+ (node.key, node.value)
+ }
+}
+
+// SAFETY: If K and V can be sent across threads, then it's also okay to send [`RBTreeNode`] across
+// threads.
+unsafe impl<K: Send, V: Send> Send for RBTreeNode<K, V> {}
+
+// SAFETY: If K and V can be accessed without synchronization, then it's also okay to access
+// [`RBTreeNode`] without synchronization.
+unsafe impl<K: Sync, V: Sync> Sync for RBTreeNode<K, V> {}
+
+impl<K, V> RBTreeNode<K, V> {
+ /// Drop the key and value, but keep the allocation.
+ ///
+ /// It then becomes a reservation that can be re-initialised into a different node (i.e., with
+ /// a different key and/or value).
+ ///
+ /// The existing key and value are dropped in-place as part of this operation, that is, memory
+ /// may be freed (but only for the key/value; memory for the node itself is kept for reuse).
+ pub fn into_reservation(self) -> RBTreeNodeReservation<K, V> {
+ RBTreeNodeReservation {
+ node: KBox::drop_contents(self.node),
+ }
+ }
+}
+
+/// A view into a single entry in a map, which may either be vacant or occupied.
+///
+/// This enum is constructed from the [`RBTree::entry`].
+///
+/// [`entry`]: fn@RBTree::entry
+pub enum Entry<'a, K, V> {
+ /// This [`RBTree`] does not have a node with this key.
+ Vacant(VacantEntry<'a, K, V>),
+ /// This [`RBTree`] already has a node with this key.
+ Occupied(OccupiedEntry<'a, K, V>),
+}
+
+/// Like [`Entry`], except that it doesn't have ownership of the key.
+enum RawEntry<'a, K, V> {
+ Vacant(RawVacantEntry<'a, K, V>),
+ Occupied(OccupiedEntry<'a, K, V>),
+}
+
+/// A view into a vacant entry in a [`RBTree`]. It is part of the [`Entry`] enum.
+pub struct VacantEntry<'a, K, V> {
+ key: K,
+ raw: RawVacantEntry<'a, K, V>,
+}
+
+/// Like [`VacantEntry`], but doesn't hold on to the key.
+///
+/// # Invariants
+/// - `parent` may be null if the new node becomes the root.
+/// - `child_field_of_parent` is a valid pointer to the left-child or right-child of `parent`. If `parent` is
+/// null, it is a pointer to the root of the [`RBTree`].
+struct RawVacantEntry<'a, K, V> {
+ rbtree: *mut RBTree<K, V>,
+ /// The node that will become the parent of the new node if we insert one.
+ parent: *mut bindings::rb_node,
+ /// This points to the left-child or right-child field of `parent`, or `root` if `parent` is
+ /// null.
+ child_field_of_parent: *mut *mut bindings::rb_node,
+ _phantom: PhantomData<&'a mut RBTree<K, V>>,
+}
+
+impl<'a, K, V> RawVacantEntry<'a, K, V> {
+ /// Inserts the given node into the [`RBTree`] at this entry.
+ ///
+ /// The `node` must have a key such that inserting it here does not break the ordering of this
+ /// [`RBTree`].
+ fn insert(self, node: RBTreeNode<K, V>) -> &'a mut V {
+ let node = KBox::into_raw(node.node);
+
+ // SAFETY: `node` is valid at least until we call `Box::from_raw`, which only happens when
+ // the node is removed or replaced.
+ let node_links = unsafe { addr_of_mut!((*node).links) };
+
+ // INVARIANT: We are linking in a new node, which is valid. It remains valid because we
+ // "forgot" it with `Box::into_raw`.
+ // SAFETY: The type invariants of `RawVacantEntry` are exactly the safety requirements of `rb_link_node`.
+ unsafe { bindings::rb_link_node(node_links, self.parent, self.child_field_of_parent) };
+
+ // SAFETY: All pointers are valid. `node` has just been inserted into the tree.
+ unsafe { bindings::rb_insert_color(node_links, addr_of_mut!((*self.rbtree).root)) };
+
+ // SAFETY: The node is valid until we remove it from the tree.
+ unsafe { &mut (*node).value }
+ }
+}
+
+impl<'a, K, V> VacantEntry<'a, K, V> {
+ /// Inserts the given node into the [`RBTree`] at this entry.
+ pub fn insert(self, value: V, reservation: RBTreeNodeReservation<K, V>) -> &'a mut V {
+ self.raw.insert(reservation.into_node(self.key, value))
+ }
+}
+
+/// A view into an occupied entry in a [`RBTree`]. It is part of the [`Entry`] enum.
+///
+/// # Invariants
+/// - `node_links` is a valid, non-null pointer to a tree node in `self.rbtree`
+pub struct OccupiedEntry<'a, K, V> {
+ rbtree: &'a mut RBTree<K, V>,
+ /// The node that this entry corresponds to.
+ node_links: *mut bindings::rb_node,
+}
+
+impl<'a, K, V> OccupiedEntry<'a, K, V> {
+ /// Gets a reference to the value in the entry.
+ pub fn get(&self) -> &V {
+ // SAFETY:
+ // - `self.node_links` is a valid pointer to a node in the tree.
+ // - We have shared access to the underlying tree, and can thus give out a shared reference.
+ unsafe { &(*container_of!(self.node_links, Node<K, V>, links)).value }
+ }
+
+ /// Gets a mutable reference to the value in the entry.
+ pub fn get_mut(&mut self) -> &mut V {
+ // SAFETY:
+ // - `self.node_links` is a valid pointer to a node in the tree.
+ // - We have exclusive access to the underlying tree, and can thus give out a mutable reference.
+ unsafe { &mut (*(container_of!(self.node_links, Node<K, V>, links).cast_mut())).value }
+ }
+
+ /// Converts the entry into a mutable reference to its value.
+ ///
+ /// If you need multiple references to the `OccupiedEntry`, see [`self#get_mut`].
+ pub fn into_mut(self) -> &'a mut V {
+ // SAFETY:
+ // - `self.node_links` is a valid pointer to a node in the tree.
+ // - This consumes the `&'a mut RBTree<K, V>`, therefore it can give out a mutable reference that lives for `'a`.
+ unsafe { &mut (*(container_of!(self.node_links, Node<K, V>, links).cast_mut())).value }
+ }
+
+ /// Remove this entry from the [`RBTree`].
+ pub fn remove_node(self) -> RBTreeNode<K, V> {
+ // SAFETY: The node is a node in the tree, so it is valid.
+ unsafe { bindings::rb_erase(self.node_links, &mut self.rbtree.root) };
+
+ // INVARIANT: The node is being returned and the caller may free it, however, it was
+ // removed from the tree. So the invariants still hold.
+ RBTreeNode {
+ // SAFETY: The node was a node in the tree, but we removed it, so we can convert it
+ // back into a box.
+ node: unsafe {
+ KBox::from_raw(container_of!(self.node_links, Node<K, V>, links).cast_mut())
+ },
+ }
+ }
+
+ /// Takes the value of the entry out of the map, and returns it.
+ pub fn remove(self) -> V {
+ let rb_node = self.remove_node();
+ let node = KBox::into_inner(rb_node.node);
+
+ node.value
+ }
+
+ /// Swap the current node for the provided node.
+ ///
+ /// The key of both nodes must be equal.
+ fn replace(self, node: RBTreeNode<K, V>) -> RBTreeNode<K, V> {
+ let node = KBox::into_raw(node.node);
+
+ // SAFETY: `node` is valid at least until we call `Box::from_raw`, which only happens when
+ // the node is removed or replaced.
+ let new_node_links = unsafe { addr_of_mut!((*node).links) };
+
+ // SAFETY: This updates the pointers so that `new_node_links` is in the tree where
+ // `self.node_links` used to be.
+ unsafe {
+ bindings::rb_replace_node(self.node_links, new_node_links, &mut self.rbtree.root)
+ };
+
+ // SAFETY:
+ // - `self.node_ptr` produces a valid pointer to a node in the tree.
+ // - Now that we removed this entry from the tree, we can convert the node to a box.
+ let old_node =
+ unsafe { KBox::from_raw(container_of!(self.node_links, Node<K, V>, links).cast_mut()) };
+
+ RBTreeNode { node: old_node }
+ }
+}
+
+struct Node<K, V> {
+ links: bindings::rb_node,
+ key: K,
+ value: V,
+}
diff --git a/rust/kernel/revocable.rs b/rust/kernel/revocable.rs
new file mode 100644
index 000000000000..1e5a9d25c21b
--- /dev/null
+++ b/rust/kernel/revocable.rs
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Revocable objects.
+//!
+//! The [`Revocable`] type wraps other types and allows access to them to be revoked. The existence
+//! of a [`RevocableGuard`] ensures that objects remain valid.
+
+use crate::{bindings, prelude::*, sync::rcu, types::Opaque};
+use core::{
+ marker::PhantomData,
+ ops::Deref,
+ ptr::drop_in_place,
+ sync::atomic::{AtomicBool, Ordering},
+};
+
+/// An object that can become inaccessible at runtime.
+///
+/// Once access is revoked and all concurrent users complete (i.e., all existing instances of
+/// [`RevocableGuard`] are dropped), the wrapped object is also dropped.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::revocable::Revocable;
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn add_two(v: &Revocable<Example>) -> Option<u32> {
+/// let guard = v.try_access()?;
+/// Some(guard.a + guard.b)
+/// }
+///
+/// let v = KBox::pin_init(Revocable::new(Example { a: 10, b: 20 }), GFP_KERNEL).unwrap();
+/// assert_eq!(add_two(&v), Some(30));
+/// v.revoke();
+/// assert_eq!(add_two(&v), None);
+/// ```
+///
+/// Sample example as above, but explicitly using the rcu read side lock.
+///
+/// ```
+/// # use kernel::revocable::Revocable;
+/// use kernel::sync::rcu;
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn add_two(v: &Revocable<Example>) -> Option<u32> {
+/// let guard = rcu::read_lock();
+/// let e = v.try_access_with_guard(&guard)?;
+/// Some(e.a + e.b)
+/// }
+///
+/// let v = KBox::pin_init(Revocable::new(Example { a: 10, b: 20 }), GFP_KERNEL).unwrap();
+/// assert_eq!(add_two(&v), Some(30));
+/// v.revoke();
+/// assert_eq!(add_two(&v), None);
+/// ```
+#[pin_data(PinnedDrop)]
+pub struct Revocable<T> {
+ is_available: AtomicBool,
+ #[pin]
+ data: Opaque<T>,
+}
+
+// SAFETY: `Revocable` is `Send` if the wrapped object is also `Send`. This is because while the
+// functionality exposed by `Revocable` can be accessed from any thread/CPU, it is possible that
+// this isn't supported by the wrapped object.
+unsafe impl<T: Send> Send for Revocable<T> {}
+
+// SAFETY: `Revocable` is `Sync` if the wrapped object is both `Send` and `Sync`. We require `Send`
+// from the wrapped object as well because of `Revocable::revoke`, which can trigger the `Drop`
+// implementation of the wrapped object from an arbitrary thread.
+unsafe impl<T: Sync + Send> Sync for Revocable<T> {}
+
+impl<T> Revocable<T> {
+ /// Creates a new revocable instance of the given data.
+ pub fn new(data: impl PinInit<T>) -> impl PinInit<Self> {
+ pin_init!(Self {
+ is_available: AtomicBool::new(true),
+ data <- Opaque::pin_init(data),
+ })
+ }
+
+ /// Tries to access the revocable wrapped object.
+ ///
+ /// Returns `None` if the object has been revoked and is therefore no longer accessible.
+ ///
+ /// Returns a guard that gives access to the object otherwise; the object is guaranteed to
+ /// remain accessible while the guard is alive. In such cases, callers are not allowed to sleep
+ /// because another CPU may be waiting to complete the revocation of this object.
+ pub fn try_access(&self) -> Option<RevocableGuard<'_, T>> {
+ let guard = rcu::read_lock();
+ if self.is_available.load(Ordering::Relaxed) {
+ // Since `self.is_available` is true, data is initialised and has to remain valid
+ // because the RCU read side lock prevents it from being dropped.
+ Some(RevocableGuard::new(self.data.get(), guard))
+ } else {
+ None
+ }
+ }
+
+ /// Tries to access the revocable wrapped object.
+ ///
+ /// Returns `None` if the object has been revoked and is therefore no longer accessible.
+ ///
+ /// Returns a shared reference to the object otherwise; the object is guaranteed to
+ /// remain accessible while the rcu read side guard is alive. In such cases, callers are not
+ /// allowed to sleep because another CPU may be waiting to complete the revocation of this
+ /// object.
+ pub fn try_access_with_guard<'a>(&'a self, _guard: &'a rcu::Guard) -> Option<&'a T> {
+ if self.is_available.load(Ordering::Relaxed) {
+ // SAFETY: Since `self.is_available` is true, data is initialised and has to remain
+ // valid because the RCU read side lock prevents it from being dropped.
+ Some(unsafe { &*self.data.get() })
+ } else {
+ None
+ }
+ }
+
+ /// # Safety
+ ///
+ /// Callers must ensure that there are no more concurrent users of the revocable object.
+ unsafe fn revoke_internal<const SYNC: bool>(&self) {
+ if self.is_available.swap(false, Ordering::Relaxed) {
+ if SYNC {
+ // SAFETY: Just an FFI call, there are no further requirements.
+ unsafe { bindings::synchronize_rcu() };
+ }
+
+ // SAFETY: We know `self.data` is valid because only one CPU can succeed the
+ // `compare_exchange` above that takes `is_available` from `true` to `false`.
+ unsafe { drop_in_place(self.data.get()) };
+ }
+ }
+
+ /// Revokes access to and drops the wrapped object.
+ ///
+ /// Access to the object is revoked immediately to new callers of [`Revocable::try_access`],
+ /// expecting that there are no concurrent users of the object.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that there are no more concurrent users of the revocable object.
+ pub unsafe fn revoke_nosync(&self) {
+ // SAFETY: By the safety requirement of this function, the caller ensures that nobody is
+ // accessing the data anymore and hence we don't have to wait for the grace period to
+ // finish.
+ unsafe { self.revoke_internal::<false>() }
+ }
+
+ /// Revokes access to and drops the wrapped object.
+ ///
+ /// Access to the object is revoked immediately to new callers of [`Revocable::try_access`].
+ ///
+ /// If there are concurrent users of the object (i.e., ones that called
+ /// [`Revocable::try_access`] beforehand and still haven't dropped the returned guard), this
+ /// function waits for the concurrent access to complete before dropping the wrapped object.
+ pub fn revoke(&self) {
+ // SAFETY: By passing `true` we ask `revoke_internal` to wait for the grace period to
+ // finish.
+ unsafe { self.revoke_internal::<true>() }
+ }
+}
+
+#[pinned_drop]
+impl<T> PinnedDrop for Revocable<T> {
+ fn drop(self: Pin<&mut Self>) {
+ // Drop only if the data hasn't been revoked yet (in which case it has already been
+ // dropped).
+ // SAFETY: We are not moving out of `p`, only dropping in place
+ let p = unsafe { self.get_unchecked_mut() };
+ if *p.is_available.get_mut() {
+ // SAFETY: We know `self.data` is valid because no other CPU has changed
+ // `is_available` to `false` yet, and no other CPU can do it anymore because this CPU
+ // holds the only reference (mutable) to `self` now.
+ unsafe { drop_in_place(p.data.get()) };
+ }
+ }
+}
+
+/// A guard that allows access to a revocable object and keeps it alive.
+///
+/// CPUs may not sleep while holding on to [`RevocableGuard`] because it's in atomic context
+/// holding the RCU read-side lock.
+///
+/// # Invariants
+///
+/// The RCU read-side lock is held while the guard is alive.
+pub struct RevocableGuard<'a, T> {
+ data_ref: *const T,
+ _rcu_guard: rcu::Guard,
+ _p: PhantomData<&'a ()>,
+}
+
+impl<T> RevocableGuard<'_, T> {
+ fn new(data_ref: *const T, rcu_guard: rcu::Guard) -> Self {
+ Self {
+ data_ref,
+ _rcu_guard: rcu_guard,
+ _p: PhantomData,
+ }
+ }
+}
+
+impl<T> Deref for RevocableGuard<'_, T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: By the type invariants, we hold the rcu read-side lock, so the object is
+ // guaranteed to remain valid.
+ unsafe { &*self.data_ref }
+ }
+}
diff --git a/rust/kernel/security.rs b/rust/kernel/security.rs
new file mode 100644
index 000000000000..25d2b1ac3833
--- /dev/null
+++ b/rust/kernel/security.rs
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Linux Security Modules (LSM).
+//!
+//! C header: [`include/linux/security.h`](srctree/include/linux/security.h).
+
+use crate::{
+ bindings,
+ error::{to_result, Result},
+};
+
+/// A security context string.
+///
+/// # Invariants
+///
+/// The `ctx` field corresponds to a valid security context as returned by a successful call to
+/// `security_secid_to_secctx`, that has not yet been destroyed by `security_release_secctx`.
+pub struct SecurityCtx {
+ ctx: bindings::lsm_context,
+}
+
+impl SecurityCtx {
+ /// Get the security context given its id.
+ pub fn from_secid(secid: u32) -> Result<Self> {
+ // SAFETY: `struct lsm_context` can be initialized to all zeros.
+ let mut ctx: bindings::lsm_context = unsafe { core::mem::zeroed() };
+
+ // SAFETY: Just a C FFI call. The pointer is valid for writes.
+ to_result(unsafe { bindings::security_secid_to_secctx(secid, &mut ctx) })?;
+
+ // INVARIANT: If the above call did not fail, then we have a valid security context.
+ Ok(Self { ctx })
+ }
+
+ /// Returns whether the security context is empty.
+ pub fn is_empty(&self) -> bool {
+ self.ctx.len == 0
+ }
+
+ /// Returns the length of this security context.
+ pub fn len(&self) -> usize {
+ self.ctx.len as usize
+ }
+
+ /// Returns the bytes for this security context.
+ pub fn as_bytes(&self) -> &[u8] {
+ let ptr = self.ctx.context;
+ if ptr.is_null() {
+ debug_assert_eq!(self.len(), 0);
+ // We can't pass a null pointer to `slice::from_raw_parts` even if the length is zero.
+ return &[];
+ }
+
+ // SAFETY: The call to `security_secid_to_secctx` guarantees that the pointer is valid for
+ // `self.len()` bytes. Furthermore, if the length is zero, then we have ensured that the
+ // pointer is not null.
+ unsafe { core::slice::from_raw_parts(ptr.cast(), self.len()) }
+ }
+}
+
+impl Drop for SecurityCtx {
+ fn drop(&mut self) {
+ // SAFETY: By the invariant of `Self`, this frees a context that came from a successful
+ // call to `security_secid_to_secctx` and has not yet been destroyed by
+ // `security_release_secctx`.
+ unsafe { bindings::security_release_secctx(&mut self.ctx) };
+ }
+}
diff --git a/rust/kernel/seq_file.rs b/rust/kernel/seq_file.rs
new file mode 100644
index 000000000000..04947c672979
--- /dev/null
+++ b/rust/kernel/seq_file.rs
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Seq file bindings.
+//!
+//! C header: [`include/linux/seq_file.h`](srctree/include/linux/seq_file.h)
+
+use crate::{bindings, c_str, types::NotThreadSafe, types::Opaque};
+
+/// A utility for generating the contents of a seq file.
+#[repr(transparent)]
+pub struct SeqFile {
+ inner: Opaque<bindings::seq_file>,
+ _not_send: NotThreadSafe,
+}
+
+impl SeqFile {
+ /// Creates a new [`SeqFile`] from a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that for the duration of 'a the following is satisfied:
+ /// * The pointer points at a valid `struct seq_file`.
+ /// * The `struct seq_file` is not accessed from any other thread.
+ pub unsafe fn from_raw<'a>(ptr: *mut bindings::seq_file) -> &'a SeqFile {
+ // SAFETY: The caller ensures that the reference is valid for 'a. There's no way to trigger
+ // a data race by using the `&SeqFile` since this is the only thread accessing the seq_file.
+ //
+ // CAST: The layout of `struct seq_file` and `SeqFile` is compatible.
+ unsafe { &*ptr.cast() }
+ }
+
+ /// Used by the [`seq_print`] macro.
+ pub fn call_printf(&self, args: core::fmt::Arguments<'_>) {
+ // SAFETY: Passing a void pointer to `Arguments` is valid for `%pA`.
+ unsafe {
+ bindings::seq_printf(
+ self.inner.get(),
+ c_str!("%pA").as_char_ptr(),
+ &args as *const _ as *const crate::ffi::c_void,
+ );
+ }
+ }
+}
+
+/// Write to a [`SeqFile`] with the ordinary Rust formatting syntax.
+#[macro_export]
+macro_rules! seq_print {
+ ($m:expr, $($arg:tt)+) => (
+ $m.call_printf(format_args!($($arg)+))
+ );
+}
+pub use seq_print;
diff --git a/rust/kernel/sizes.rs b/rust/kernel/sizes.rs
new file mode 100644
index 000000000000..834c343e4170
--- /dev/null
+++ b/rust/kernel/sizes.rs
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Commonly used sizes.
+//!
+//! C headers: [`include/linux/sizes.h`](srctree/include/linux/sizes.h).
+
+/// 0x00000400
+pub const SZ_1K: usize = bindings::SZ_1K as usize;
+/// 0x00000800
+pub const SZ_2K: usize = bindings::SZ_2K as usize;
+/// 0x00001000
+pub const SZ_4K: usize = bindings::SZ_4K as usize;
+/// 0x00002000
+pub const SZ_8K: usize = bindings::SZ_8K as usize;
+/// 0x00004000
+pub const SZ_16K: usize = bindings::SZ_16K as usize;
+/// 0x00008000
+pub const SZ_32K: usize = bindings::SZ_32K as usize;
+/// 0x00010000
+pub const SZ_64K: usize = bindings::SZ_64K as usize;
+/// 0x00020000
+pub const SZ_128K: usize = bindings::SZ_128K as usize;
+/// 0x00040000
+pub const SZ_256K: usize = bindings::SZ_256K as usize;
+/// 0x00080000
+pub const SZ_512K: usize = bindings::SZ_512K as usize;
diff --git a/rust/kernel/std_vendor.rs b/rust/kernel/std_vendor.rs
index 388d6a5147a2..279bd353687a 100644
--- a/rust/kernel/std_vendor.rs
+++ b/rust/kernel/std_vendor.rs
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
+//! Rust standard library vendored code.
+//!
//! The contents of this file come from the Rust standard library, hosted in
//! the <https://github.com/rust-lang/rust> repository, licensed under
//! "Apache-2.0 OR MIT" and adapted for kernel use. For copyright details,
@@ -14,9 +16,9 @@
///
/// ```rust
/// let a = 2;
-/// # #[allow(clippy::dbg_macro)]
+/// # #[expect(clippy::disallowed_macros)]
/// let b = dbg!(a * 2) + 1;
-/// // ^-- prints: [src/main.rs:2] a * 2 = 4
+/// // ^-- prints: [src/main.rs:3:9] a * 2 = 4
/// assert_eq!(b, 5);
/// ```
///
@@ -52,7 +54,7 @@
/// With a method call:
///
/// ```rust
-/// # #[allow(clippy::dbg_macro)]
+/// # #[expect(clippy::disallowed_macros)]
/// fn foo(n: usize) {
/// if dbg!(n.checked_sub(4)).is_some() {
/// // ...
@@ -65,14 +67,13 @@
/// This prints to the kernel log:
///
/// ```text,ignore
-/// [src/main.rs:4] n.checked_sub(4) = None
+/// [src/main.rs:3:8] n.checked_sub(4) = None
/// ```
///
/// Naive factorial implementation:
///
/// ```rust
-/// # #[allow(clippy::dbg_macro)]
-/// # {
+/// # #![expect(clippy::disallowed_macros)]
/// fn factorial(n: u32) -> u32 {
/// if dbg!(n <= 1) {
/// dbg!(1)
@@ -82,21 +83,20 @@
/// }
///
/// dbg!(factorial(4));
-/// # }
/// ```
///
/// This prints to the kernel log:
///
/// ```text,ignore
-/// [src/main.rs:3] n <= 1 = false
-/// [src/main.rs:3] n <= 1 = false
-/// [src/main.rs:3] n <= 1 = false
-/// [src/main.rs:3] n <= 1 = true
-/// [src/main.rs:4] 1 = 1
-/// [src/main.rs:5] n * factorial(n - 1) = 2
-/// [src/main.rs:5] n * factorial(n - 1) = 6
-/// [src/main.rs:5] n * factorial(n - 1) = 24
-/// [src/main.rs:11] factorial(4) = 24
+/// [src/main.rs:3:8] n <= 1 = false
+/// [src/main.rs:3:8] n <= 1 = false
+/// [src/main.rs:3:8] n <= 1 = false
+/// [src/main.rs:3:8] n <= 1 = true
+/// [src/main.rs:4:9] 1 = 1
+/// [src/main.rs:5:9] n * factorial(n - 1) = 2
+/// [src/main.rs:5:9] n * factorial(n - 1) = 6
+/// [src/main.rs:5:9] n * factorial(n - 1) = 24
+/// [src/main.rs:11:1] factorial(4) = 24
/// ```
///
/// The `dbg!(..)` macro moves the input:
@@ -118,7 +118,7 @@
/// a tuple (and return it, too):
///
/// ```
-/// # #[allow(clippy::dbg_macro)]
+/// # #![expect(clippy::disallowed_macros)]
/// assert_eq!(dbg!(1usize, 2u32), (1, 2));
/// ```
///
@@ -127,16 +127,14 @@
/// invocations. You can use a 1-tuple directly if you need one:
///
/// ```
-/// # #[allow(clippy::dbg_macro)]
-/// # {
+/// # #![expect(clippy::disallowed_macros)]
/// assert_eq!(1, dbg!(1u32,)); // trailing comma ignored
/// assert_eq!((1,), dbg!((1u32,))); // 1-tuple
-/// # }
/// ```
///
/// [`std::dbg`]: https://doc.rust-lang.org/std/macro.dbg.html
/// [`eprintln`]: https://doc.rust-lang.org/std/macro.eprintln.html
-/// [`printk`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html
+/// [`printk`]: https://docs.kernel.org/core-api/printk-basics.html
/// [`pr_info`]: crate::pr_info!
/// [`pr_debug`]: crate::pr_debug!
#[macro_export]
@@ -146,15 +144,16 @@ macro_rules! dbg {
// `$val` expression could be a block (`{ .. }`), in which case the `pr_info!`
// will be malformed.
() => {
- $crate::pr_info!("[{}:{}]\n", ::core::file!(), ::core::line!())
+ $crate::pr_info!("[{}:{}:{}]\n", ::core::file!(), ::core::line!(), ::core::column!())
};
($val:expr $(,)?) => {
// Use of `match` here is intentional because it affects the lifetimes
// of temporaries - https://stackoverflow.com/a/48732525/1063961
match $val {
tmp => {
- $crate::pr_info!("[{}:{}] {} = {:#?}\n",
- ::core::file!(), ::core::line!(), ::core::stringify!($val), &tmp);
+ $crate::pr_info!("[{}:{}:{}] {} = {:#?}\n",
+ ::core::file!(), ::core::line!(), ::core::column!(),
+ ::core::stringify!($val), &tmp);
tmp
}
}
diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs
index 7d848b83add4..28e2201604d6 100644
--- a/rust/kernel/str.rs
+++ b/rust/kernel/str.rs
@@ -2,20 +2,111 @@
//! String representations.
-use alloc::alloc::AllocError;
-use alloc::vec::Vec;
+use crate::alloc::{flags::*, AllocError, KVec};
use core::fmt::{self, Write};
-use core::ops::{self, Deref, Index};
+use core::ops::{self, Deref, DerefMut, Index};
-use crate::{
- bindings,
- error::{code::*, Error},
-};
+use crate::error::{code::*, Error};
/// Byte string without UTF-8 validity guarantee.
-///
-/// `BStr` is simply an alias to `[u8]`, but has a more evident semantical meaning.
-pub type BStr = [u8];
+#[repr(transparent)]
+pub struct BStr([u8]);
+
+impl BStr {
+ /// Returns the length of this string.
+ #[inline]
+ pub const fn len(&self) -> usize {
+ self.0.len()
+ }
+
+ /// Returns `true` if the string is empty.
+ #[inline]
+ pub const fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// Creates a [`BStr`] from a `[u8]`.
+ #[inline]
+ pub const fn from_bytes(bytes: &[u8]) -> &Self {
+ // SAFETY: `BStr` is transparent to `[u8]`.
+ unsafe { &*(bytes as *const [u8] as *const BStr) }
+ }
+}
+
+impl fmt::Display for BStr {
+ /// Formats printable ASCII characters, escaping the rest.
+ ///
+ /// ```
+ /// # use kernel::{fmt, b_str, str::{BStr, CString}};
+ /// let ascii = b_str!("Hello, BStr!");
+ /// let s = CString::try_from_fmt(fmt!("{}", ascii))?;
+ /// assert_eq!(s.as_bytes(), "Hello, BStr!".as_bytes());
+ ///
+ /// let non_ascii = b_str!("🦀");
+ /// let s = CString::try_from_fmt(fmt!("{}", non_ascii))?;
+ /// assert_eq!(s.as_bytes(), "\\xf0\\x9f\\xa6\\x80".as_bytes());
+ /// # Ok::<(), kernel::error::Error>(())
+ /// ```
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ for &b in &self.0 {
+ match b {
+ // Common escape codes.
+ b'\t' => f.write_str("\\t")?,
+ b'\n' => f.write_str("\\n")?,
+ b'\r' => f.write_str("\\r")?,
+ // Printable characters.
+ 0x20..=0x7e => f.write_char(b as char)?,
+ _ => write!(f, "\\x{:02x}", b)?,
+ }
+ }
+ Ok(())
+ }
+}
+
+impl fmt::Debug for BStr {
+ /// Formats printable ASCII characters with a double quote on either end,
+ /// escaping the rest.
+ ///
+ /// ```
+ /// # use kernel::{fmt, b_str, str::{BStr, CString}};
+ /// // Embedded double quotes are escaped.
+ /// let ascii = b_str!("Hello, \"BStr\"!");
+ /// let s = CString::try_from_fmt(fmt!("{:?}", ascii))?;
+ /// assert_eq!(s.as_bytes(), "\"Hello, \\\"BStr\\\"!\"".as_bytes());
+ ///
+ /// let non_ascii = b_str!("😺");
+ /// let s = CString::try_from_fmt(fmt!("{:?}", non_ascii))?;
+ /// assert_eq!(s.as_bytes(), "\"\\xf0\\x9f\\x98\\xba\"".as_bytes());
+ /// # Ok::<(), kernel::error::Error>(())
+ /// ```
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.write_char('"')?;
+ for &b in &self.0 {
+ match b {
+ // Common escape codes.
+ b'\t' => f.write_str("\\t")?,
+ b'\n' => f.write_str("\\n")?,
+ b'\r' => f.write_str("\\r")?,
+ // String escape characters.
+ b'\"' => f.write_str("\\\"")?,
+ b'\\' => f.write_str("\\\\")?,
+ // Printable characters.
+ 0x20..=0x7e => f.write_char(b as char)?,
+ _ => write!(f, "\\x{:02x}", b)?,
+ }
+ }
+ f.write_char('"')
+ }
+}
+
+impl Deref for BStr {
+ type Target = [u8];
+
+ #[inline]
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
/// Creates a new [`BStr`] from a string literal.
///
@@ -33,7 +124,7 @@ pub type BStr = [u8];
macro_rules! b_str {
($str:literal) => {{
const S: &'static str = $str;
- const C: &'static $crate::str::BStr = S.as_bytes();
+ const C: &'static $crate::str::BStr = $crate::str::BStr::from_bytes(S.as_bytes());
C
}};
}
@@ -72,10 +163,10 @@ impl CStr {
/// Returns the length of this string with `NUL`.
#[inline]
pub const fn len_with_nul(&self) -> usize {
- // SAFETY: This is one of the invariant of `CStr`.
- // We add a `unreachable_unchecked` here to hint the optimizer that
- // the value returned from this function is non-zero.
if self.0.is_empty() {
+ // SAFETY: This is one of the invariant of `CStr`.
+ // We add a `unreachable_unchecked` here to hint the optimizer that
+ // the value returned from this function is non-zero.
unsafe { core::hint::unreachable_unchecked() };
}
self.0.len()
@@ -95,12 +186,12 @@ impl CStr {
/// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr`
/// must not be mutated.
#[inline]
- pub unsafe fn from_char_ptr<'a>(ptr: *const core::ffi::c_char) -> &'a Self {
+ pub unsafe fn from_char_ptr<'a>(ptr: *const crate::ffi::c_char) -> &'a Self {
// SAFETY: The safety precondition guarantees `ptr` is a valid pointer
// to a `NUL`-terminated C string.
let len = unsafe { bindings::strlen(ptr) } + 1;
// SAFETY: Lifetime guaranteed by the safety precondition.
- let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len as _) };
+ let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len) };
// SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`.
// As we have added 1 to `len`, the last byte is known to be `NUL`.
unsafe { Self::from_bytes_with_nul_unchecked(bytes) }
@@ -143,19 +234,32 @@ impl CStr {
unsafe { core::mem::transmute(bytes) }
}
+ /// Creates a mutable [`CStr`] from a `[u8]` without performing any
+ /// additional checks.
+ ///
+ /// # Safety
+ ///
+ /// `bytes` *must* end with a `NUL` byte, and should only have a single
+ /// `NUL` byte (or the string will be truncated).
+ #[inline]
+ pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr {
+ // SAFETY: Properties of `bytes` guaranteed by the safety precondition.
+ unsafe { &mut *(bytes as *mut [u8] as *mut CStr) }
+ }
+
/// Returns a C pointer to the string.
#[inline]
- pub const fn as_char_ptr(&self) -> *const core::ffi::c_char {
- self.0.as_ptr() as _
+ pub const fn as_char_ptr(&self) -> *const crate::ffi::c_char {
+ self.0.as_ptr()
}
- /// Convert the string to a byte slice without the trailing 0 byte.
+ /// Convert the string to a byte slice without the trailing `NUL` byte.
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.0[..self.len()]
}
- /// Convert the string to a byte slice containing the trailing 0 byte.
+ /// Convert the string to a byte slice containing the trailing `NUL` byte.
#[inline]
pub const fn as_bytes_with_nul(&self) -> &[u8] {
&self.0
@@ -171,8 +275,9 @@ impl CStr {
///
/// ```
/// # use kernel::str::CStr;
- /// let cstr = CStr::from_bytes_with_nul(b"foo\0").unwrap();
+ /// let cstr = CStr::from_bytes_with_nul(b"foo\0")?;
/// assert_eq!(cstr.to_str(), Ok("foo"));
+ /// # Ok::<(), kernel::error::Error>(())
/// ```
#[inline]
pub fn to_str(&self) -> Result<&str, core::str::Utf8Error> {
@@ -191,13 +296,14 @@ impl CStr {
/// ```
/// # use kernel::c_str;
/// # use kernel::str::CStr;
+ /// let bar = c_str!("ツ");
/// // SAFETY: String literals are guaranteed to be valid UTF-8
/// // by the Rust compiler.
- /// let bar = c_str!("ツ");
/// assert_eq!(unsafe { bar.as_str_unchecked() }, "ツ");
/// ```
#[inline]
pub unsafe fn as_str_unchecked(&self) -> &str {
+ // SAFETY: TODO.
unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
}
@@ -206,6 +312,70 @@ impl CStr {
pub fn to_cstring(&self) -> Result<CString, AllocError> {
CString::try_from(self)
}
+
+ /// Converts this [`CStr`] to its ASCII lower case equivalent in-place.
+ ///
+ /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
+ /// but non-ASCII letters are unchanged.
+ ///
+ /// To return a new lowercased value without modifying the existing one, use
+ /// [`to_ascii_lowercase()`].
+ ///
+ /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
+ pub fn make_ascii_lowercase(&mut self) {
+ // INVARIANT: This doesn't introduce or remove NUL bytes in the C
+ // string.
+ self.0.make_ascii_lowercase();
+ }
+
+ /// Converts this [`CStr`] to its ASCII upper case equivalent in-place.
+ ///
+ /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
+ /// but non-ASCII letters are unchanged.
+ ///
+ /// To return a new uppercased value without modifying the existing one, use
+ /// [`to_ascii_uppercase()`].
+ ///
+ /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
+ pub fn make_ascii_uppercase(&mut self) {
+ // INVARIANT: This doesn't introduce or remove NUL bytes in the C
+ // string.
+ self.0.make_ascii_uppercase();
+ }
+
+ /// Returns a copy of this [`CString`] where each character is mapped to its
+ /// ASCII lower case equivalent.
+ ///
+ /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
+ /// but non-ASCII letters are unchanged.
+ ///
+ /// To lowercase the value in-place, use [`make_ascii_lowercase`].
+ ///
+ /// [`make_ascii_lowercase`]: str::make_ascii_lowercase
+ pub fn to_ascii_lowercase(&self) -> Result<CString, AllocError> {
+ let mut s = self.to_cstring()?;
+
+ s.make_ascii_lowercase();
+
+ Ok(s)
+ }
+
+ /// Returns a copy of this [`CString`] where each character is mapped to its
+ /// ASCII upper case equivalent.
+ ///
+ /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
+ /// but non-ASCII letters are unchanged.
+ ///
+ /// To uppercase the value in-place, use [`make_ascii_uppercase`].
+ ///
+ /// [`make_ascii_uppercase`]: str::make_ascii_uppercase
+ pub fn to_ascii_uppercase(&self) -> Result<CString, AllocError> {
+ let mut s = self.to_cstring()?;
+
+ s.make_ascii_uppercase();
+
+ Ok(s)
+ }
}
impl fmt::Display for CStr {
@@ -217,12 +387,13 @@ impl fmt::Display for CStr {
/// # use kernel::str::CStr;
/// # use kernel::str::CString;
/// let penguin = c_str!("🐧");
- /// let s = CString::try_from_fmt(fmt!("{}", penguin)).unwrap();
+ /// let s = CString::try_from_fmt(fmt!("{}", penguin))?;
/// assert_eq!(s.as_bytes_with_nul(), "\\xf0\\x9f\\x90\\xa7\0".as_bytes());
///
/// let ascii = c_str!("so \"cool\"");
- /// let s = CString::try_from_fmt(fmt!("{}", ascii)).unwrap();
+ /// let s = CString::try_from_fmt(fmt!("{}", ascii))?;
/// assert_eq!(s.as_bytes_with_nul(), "so \"cool\"\0".as_bytes());
+ /// # Ok::<(), kernel::error::Error>(())
/// ```
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for &c in self.as_bytes() {
@@ -246,13 +417,14 @@ impl fmt::Debug for CStr {
/// # use kernel::str::CStr;
/// # use kernel::str::CString;
/// let penguin = c_str!("🐧");
- /// let s = CString::try_from_fmt(fmt!("{:?}", penguin)).unwrap();
+ /// let s = CString::try_from_fmt(fmt!("{:?}", penguin))?;
/// assert_eq!(s.as_bytes_with_nul(), "\"\\xf0\\x9f\\x90\\xa7\"\0".as_bytes());
///
/// // Embedded double quotes are escaped.
/// let ascii = c_str!("so \"cool\"");
- /// let s = CString::try_from_fmt(fmt!("{:?}", ascii)).unwrap();
+ /// let s = CString::try_from_fmt(fmt!("{:?}", ascii))?;
/// assert_eq!(s.as_bytes_with_nul(), "\"so \\\"cool\\\"\"\0".as_bytes());
+ /// # Ok::<(), kernel::error::Error>(())
/// ```
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("\"")?;
@@ -271,7 +443,7 @@ impl fmt::Debug for CStr {
impl AsRef<BStr> for CStr {
#[inline]
fn as_ref(&self) -> &BStr {
- self.as_bytes()
+ BStr::from_bytes(self.as_bytes())
}
}
@@ -280,7 +452,7 @@ impl Deref for CStr {
#[inline]
fn deref(&self) -> &Self::Target {
- self.as_bytes()
+ self.as_ref()
}
}
@@ -327,7 +499,7 @@ where
#[inline]
fn index(&self, index: Idx) -> &Self::Output {
- &self.as_bytes()[index]
+ &self.as_ref()[index]
}
}
@@ -355,9 +527,46 @@ macro_rules! c_str {
}
#[cfg(test)]
+#[expect(clippy::items_after_test_module)]
mod tests {
use super::*;
+ struct String(CString);
+
+ impl String {
+ fn from_fmt(args: fmt::Arguments<'_>) -> Self {
+ String(CString::try_from_fmt(args).unwrap())
+ }
+ }
+
+ impl Deref for String {
+ type Target = str;
+
+ fn deref(&self) -> &str {
+ self.0.to_str().unwrap()
+ }
+ }
+
+ macro_rules! format {
+ ($($f:tt)*) => ({
+ &*String::from_fmt(kernel::fmt!($($f)*))
+ })
+ }
+
+ const ALL_ASCII_CHARS: &str =
+ "\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09\\x0a\\x0b\\x0c\\x0d\\x0e\\x0f\
+ \\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f \
+ !\"#$%&'()*+,-./0123456789:;<=>?@\
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f\
+ \\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d\\x8e\\x8f\
+ \\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b\\x9c\\x9d\\x9e\\x9f\
+ \\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9\\xaa\\xab\\xac\\xad\\xae\\xaf\
+ \\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\
+ \\xc0\\xc1\\xc2\\xc3\\xc4\\xc5\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\
+ \\xd0\\xd1\\xd2\\xd3\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\
+ \\xe0\\xe1\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef\
+ \\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff";
+
#[test]
fn test_cstr_to_str() {
let good_bytes = b"\xf0\x9f\xa6\x80\0";
@@ -378,9 +587,73 @@ mod tests {
fn test_cstr_as_str_unchecked() {
let good_bytes = b"\xf0\x9f\x90\xA7\0";
let checked_cstr = CStr::from_bytes_with_nul(good_bytes).unwrap();
+ // SAFETY: The contents come from a string literal which contains valid UTF-8.
let unchecked_str = unsafe { checked_cstr.as_str_unchecked() };
assert_eq!(unchecked_str, "🐧");
}
+
+ #[test]
+ fn test_cstr_display() {
+ let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap();
+ assert_eq!(format!("{}", hello_world), "hello, world!");
+ let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap();
+ assert_eq!(format!("{}", non_printables), "\\x01\\x09\\x0a");
+ let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap();
+ assert_eq!(format!("{}", non_ascii), "d\\xe9j\\xe0 vu");
+ let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap();
+ assert_eq!(format!("{}", good_bytes), "\\xf0\\x9f\\xa6\\x80");
+ }
+
+ #[test]
+ fn test_cstr_display_all_bytes() {
+ let mut bytes: [u8; 256] = [0; 256];
+ // fill `bytes` with [1..=255] + [0]
+ for i in u8::MIN..=u8::MAX {
+ bytes[i as usize] = i.wrapping_add(1);
+ }
+ let cstr = CStr::from_bytes_with_nul(&bytes).unwrap();
+ assert_eq!(format!("{}", cstr), ALL_ASCII_CHARS);
+ }
+
+ #[test]
+ fn test_cstr_debug() {
+ let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap();
+ assert_eq!(format!("{:?}", hello_world), "\"hello, world!\"");
+ let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap();
+ assert_eq!(format!("{:?}", non_printables), "\"\\x01\\x09\\x0a\"");
+ let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap();
+ assert_eq!(format!("{:?}", non_ascii), "\"d\\xe9j\\xe0 vu\"");
+ let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap();
+ assert_eq!(format!("{:?}", good_bytes), "\"\\xf0\\x9f\\xa6\\x80\"");
+ }
+
+ #[test]
+ fn test_bstr_display() {
+ let hello_world = BStr::from_bytes(b"hello, world!");
+ assert_eq!(format!("{}", hello_world), "hello, world!");
+ let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_");
+ assert_eq!(format!("{}", escapes), "_\\t_\\n_\\r_\\_'_\"_");
+ let others = BStr::from_bytes(b"\x01");
+ assert_eq!(format!("{}", others), "\\x01");
+ let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu");
+ assert_eq!(format!("{}", non_ascii), "d\\xe9j\\xe0 vu");
+ let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80");
+ assert_eq!(format!("{}", good_bytes), "\\xf0\\x9f\\xa6\\x80");
+ }
+
+ #[test]
+ fn test_bstr_debug() {
+ let hello_world = BStr::from_bytes(b"hello, world!");
+ assert_eq!(format!("{:?}", hello_world), "\"hello, world!\"");
+ let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_");
+ assert_eq!(format!("{:?}", escapes), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\"");
+ let others = BStr::from_bytes(b"\x01");
+ assert_eq!(format!("{:?}", others), "\"\\x01\"");
+ let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu");
+ assert_eq!(format!("{:?}", non_ascii), "\"d\\xe9j\\xe0 vu\"");
+ let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80");
+ assert_eq!(format!("{:?}", good_bytes), "\"\\xf0\\x9f\\xa6\\x80\"");
+ }
}
/// Allows formatting of [`fmt::Arguments`] into a raw buffer.
@@ -449,7 +722,7 @@ impl RawFormatter {
self.pos as _
}
- /// Return the number of bytes written to the formatter.
+ /// Returns the number of bytes written to the formatter.
pub(crate) fn bytes_written(&self) -> usize {
self.pos - self.beg
}
@@ -533,19 +806,20 @@ impl fmt::Write for Formatter {
/// ```
/// use kernel::{str::CString, fmt};
///
-/// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20)).unwrap();
+/// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20))?;
/// assert_eq!(s.as_bytes_with_nul(), "abc1020\0".as_bytes());
///
/// let tmp = "testing";
-/// let s = CString::try_from_fmt(fmt!("{tmp}{}", 123)).unwrap();
+/// let s = CString::try_from_fmt(fmt!("{tmp}{}", 123))?;
/// assert_eq!(s.as_bytes_with_nul(), "testing123\0".as_bytes());
///
/// // This fails because it has an embedded `NUL` byte.
/// let s = CString::try_from_fmt(fmt!("a\0b{}", 123));
/// assert_eq!(s.is_ok(), false);
+/// # Ok::<(), kernel::error::Error>(())
/// ```
pub struct CString {
- buf: Vec<u8>,
+ buf: KVec<u8>,
}
impl CString {
@@ -558,7 +832,7 @@ impl CString {
let size = f.bytes_written();
// Allocate a vector with the required number of bytes, and write to it.
- let mut buf = Vec::try_with_capacity(size)?;
+ let mut buf = KVec::with_capacity(size, GFP_KERNEL)?;
// SAFETY: The buffer stored in `buf` is at least of size `size` and is valid for writes.
let mut f = unsafe { Formatter::from_buffer(buf.as_mut_ptr(), size) };
f.write_fmt(args)?;
@@ -572,7 +846,7 @@ impl CString {
// SAFETY: The buffer is valid for read because `f.bytes_written()` is bounded by `size`
// (which the minimum buffer size) and is non-zero (we wrote at least the `NUL` terminator)
// so `f.bytes_written() - 1` doesn't underflow.
- let ptr = unsafe { bindings::memchr(buf.as_ptr().cast(), 0, (f.bytes_written() - 1) as _) };
+ let ptr = unsafe { bindings::memchr(buf.as_ptr().cast(), 0, f.bytes_written() - 1) };
if !ptr.is_null() {
return Err(EINVAL);
}
@@ -593,14 +867,21 @@ impl Deref for CString {
}
}
+impl DerefMut for CString {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ // SAFETY: A `CString` is always NUL-terminated and contains no other
+ // NUL bytes.
+ unsafe { CStr::from_bytes_with_nul_unchecked_mut(self.buf.as_mut_slice()) }
+ }
+}
+
impl<'a> TryFrom<&'a CStr> for CString {
type Error = AllocError;
fn try_from(cstr: &'a CStr) -> Result<CString, AllocError> {
- let mut buf = Vec::new();
+ let mut buf = KVec::new();
- buf.try_extend_from_slice(cstr.as_bytes_with_nul())
- .map_err(|_| AllocError)?;
+ buf.extend_from_slice(cstr.as_bytes_with_nul(), GFP_KERNEL)?;
// INVARIANT: The `CStr` and `CString` types have the same invariants for
// the string data, and we copied it over without changes.
diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs
index d219ee518eff..16eab9138b2b 100644
--- a/rust/kernel/sync.rs
+++ b/rust/kernel/sync.rs
@@ -11,10 +11,14 @@ mod arc;
mod condvar;
pub mod lock;
mod locked_by;
+pub mod poll;
+pub mod rcu;
pub use arc::{Arc, ArcBorrow, UniqueArc};
-pub use condvar::CondVar;
-pub use lock::{mutex::Mutex, spinlock::SpinLock};
+pub use condvar::{new_condvar, CondVar, CondVarTimeoutResult};
+pub use lock::global::{global_lock, GlobalGuard, GlobalLock, GlobalLockBackend, GlobalLockedBy};
+pub use lock::mutex::{new_mutex, Mutex, MutexGuard};
+pub use lock::spinlock::{new_spinlock, SpinLock, SpinLockGuard};
pub use locked_by::LockedBy;
/// Represents a lockdep class. It's a wrapper around C's `lock_class_key`.
@@ -26,11 +30,6 @@ pub struct LockClassKey(Opaque<bindings::lock_class_key>);
unsafe impl Sync for LockClassKey {}
impl LockClassKey {
- /// Creates a new lock class key.
- pub const fn new() -> Self {
- Self(Opaque::uninit())
- }
-
pub(crate) fn as_ptr(&self) -> *mut bindings::lock_class_key {
self.0.get()
}
@@ -41,7 +40,10 @@ impl LockClassKey {
#[macro_export]
macro_rules! static_lock_class {
() => {{
- static CLASS: $crate::sync::LockClassKey = $crate::sync::LockClassKey::new();
+ static CLASS: $crate::sync::LockClassKey =
+ // SAFETY: lockdep expects uninitialized memory when it's handed a statically allocated
+ // lock_class_key
+ unsafe { ::core::mem::MaybeUninit::uninit().assume_init() };
&CLASS
}};
}
diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs
index 77cdbcf7bd2e..3cefda7a4372 100644
--- a/rust/kernel/sync/arc.rs
+++ b/rust/kernel/sync/arc.rs
@@ -12,25 +12,25 @@
//! 2. It does not support weak references, which allows it to be half the size.
//! 3. It saturates the reference count instead of aborting when it goes over a threshold.
//! 4. It does not provide a `get_mut` method, so the ref counted object is pinned.
+//! 5. The object in [`Arc`] is pinned implicitly.
//!
//! [`Arc`]: https://doc.rust-lang.org/std/sync/struct.Arc.html
use crate::{
+ alloc::{AllocError, Flags, KBox},
bindings,
- error::{self, Error},
init::{self, InPlaceInit, Init, PinInit},
try_init,
types::{ForeignOwnable, Opaque},
};
-use alloc::boxed::Box;
use core::{
- alloc::{AllocError, Layout},
+ alloc::Layout,
fmt,
- marker::{PhantomData, Unsize},
+ marker::PhantomData,
mem::{ManuallyDrop, MaybeUninit},
ops::{Deref, DerefMut},
pin::Pin,
- ptr::{NonNull, Pointee},
+ ptr::NonNull,
};
use macros::pin_data;
@@ -56,8 +56,8 @@ mod std_vendor;
/// b: u32,
/// }
///
-/// // Create a ref-counted instance of `Example`.
-/// let obj = Arc::try_new(Example { a: 10, b: 20 })?;
+/// // Create a refcounted instance of `Example`.
+/// let obj = Arc::new(Example { a: 10, b: 20 }, GFP_KERNEL)?;
///
/// // Get a new pointer to `obj` and increment the refcount.
/// let cloned = obj.clone();
@@ -96,7 +96,7 @@ mod std_vendor;
/// }
/// }
///
-/// let obj = Arc::try_new(Example { a: 10, b: 20 })?;
+/// let obj = Arc::new(Example { a: 10, b: 20 }, GFP_KERNEL)?;
/// obj.use_reference();
/// obj.take_over();
/// # Ok::<(), Error>(())
@@ -119,14 +119,24 @@ mod std_vendor;
/// impl MyTrait for Example {}
///
/// // `obj` has type `Arc<Example>`.
-/// let obj: Arc<Example> = Arc::try_new(Example)?;
+/// let obj: Arc<Example> = Arc::new(Example, GFP_KERNEL)?;
///
/// // `coerced` has type `Arc<dyn MyTrait>`.
/// let coerced: Arc<dyn MyTrait> = obj;
/// # Ok::<(), Error>(())
/// ```
+#[repr(transparent)]
+#[cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, derive(core::marker::CoercePointee))]
pub struct Arc<T: ?Sized> {
ptr: NonNull<ArcInner<T>>,
+ // NB: this informs dropck that objects of type `ArcInner<T>` may be used in `<Arc<T> as
+ // Drop>::drop`. Note that dropck already assumes that objects of type `T` may be used in
+ // `<Arc<T> as Drop>::drop` and the distinction between `T` and `ArcInner<T>` is not presently
+ // meaningful with respect to dropck - but this may change in the future so this is left here
+ // out of an abundance of caution.
+ //
+ // See https://doc.rust-lang.org/nomicon/phantom-data.html#generic-parameters-and-drop-checking
+ // for more detail on the semantics of dropck in the presence of `PhantomData`.
_p: PhantomData<ArcInner<T>>,
}
@@ -137,15 +147,47 @@ struct ArcInner<T: ?Sized> {
data: T,
}
-// This is to allow [`Arc`] (and variants) to be used as the type of `self`.
-impl<T: ?Sized> core::ops::Receiver for Arc<T> {}
+impl<T: ?Sized> ArcInner<T> {
+ /// Converts a pointer to the contents of an [`Arc`] into a pointer to the [`ArcInner`].
+ ///
+ /// # Safety
+ ///
+ /// `ptr` must have been returned by a previous call to [`Arc::into_raw`], and the `Arc` must
+ /// not yet have been destroyed.
+ unsafe fn container_of(ptr: *const T) -> NonNull<ArcInner<T>> {
+ let refcount_layout = Layout::new::<bindings::refcount_t>();
+ // SAFETY: The caller guarantees that the pointer is valid.
+ let val_layout = Layout::for_value(unsafe { &*ptr });
+ // SAFETY: We're computing the layout of a real struct that existed when compiling this
+ // binary, so its layout is not so large that it can trigger arithmetic overflow.
+ let val_offset = unsafe { refcount_layout.extend(val_layout).unwrap_unchecked().1 };
+
+ // Pointer casts leave the metadata unchanged. This is okay because the metadata of `T` and
+ // `ArcInner<T>` is the same since `ArcInner` is a struct with `T` as its last field.
+ //
+ // This is documented at:
+ // <https://doc.rust-lang.org/std/ptr/trait.Pointee.html>.
+ let ptr = ptr as *const ArcInner<T>;
+
+ // SAFETY: The pointer is in-bounds of an allocation both before and after offsetting the
+ // pointer, since it originates from a previous call to `Arc::into_raw` on an `Arc` that is
+ // still valid.
+ let ptr = unsafe { ptr.byte_sub(val_offset) };
+
+ // SAFETY: The pointer can't be null since you can't have an `ArcInner<T>` value at the null
+ // address.
+ unsafe { NonNull::new_unchecked(ptr.cast_mut()) }
+ }
+}
// This is to allow coercion from `Arc<T>` to `Arc<U>` if `T` can be converted to the
// dynamically-sized type (DST) `U`.
-impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::CoerceUnsized<Arc<U>> for Arc<T> {}
+#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
+impl<T: ?Sized + core::marker::Unsize<U>, U: ?Sized> core::ops::CoerceUnsized<Arc<U>> for Arc<T> {}
// This is to allow `Arc<U>` to be dispatched on when `Arc<T>` can be coerced into `Arc<U>`.
-impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<Arc<U>> for Arc<T> {}
+#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
+impl<T: ?Sized + core::marker::Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<Arc<U>> for Arc<T> {}
// SAFETY: It is safe to send `Arc<T>` to another thread when the underlying `T` is `Sync` because
// it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs
@@ -162,7 +204,7 @@ unsafe impl<T: ?Sized + Sync + Send> Sync for Arc<T> {}
impl<T> Arc<T> {
/// Constructs a new reference counted instance of `T`.
- pub fn try_new(contents: T) -> Result<Self, AllocError> {
+ pub fn new(contents: T, flags: Flags) -> Result<Self, AllocError> {
// INVARIANT: The refcount is initialised to a non-zero value.
let value = ArcInner {
// SAFETY: There are no safety requirements for this FFI call.
@@ -170,33 +212,12 @@ impl<T> Arc<T> {
data: contents,
};
- let inner = Box::try_new(value)?;
+ let inner = KBox::new(value, flags)?;
+ let inner = KBox::leak(inner).into();
// SAFETY: We just created `inner` with a reference count of 1, which is owned by the new
// `Arc` object.
- Ok(unsafe { Self::from_inner(Box::leak(inner).into()) })
- }
-
- /// Use the given initializer to in-place initialize a `T`.
- ///
- /// If `T: !Unpin` it will not be able to move afterwards.
- #[inline]
- pub fn pin_init<E>(init: impl PinInit<T, E>) -> error::Result<Self>
- where
- Error: From<E>,
- {
- UniqueArc::pin_init(init).map(|u| u.into())
- }
-
- /// Use the given initializer to in-place initialize a `T`.
- ///
- /// This is equivalent to [`Arc<T>::pin_init`], since an [`Arc`] is always pinned.
- #[inline]
- pub fn init<E>(init: impl Init<T, E>) -> error::Result<Self>
- where
- Error: From<E>,
- {
- UniqueArc::init(init).map(|u| u.into())
+ Ok(unsafe { Self::from_inner(inner) })
}
}
@@ -232,29 +253,13 @@ impl<T: ?Sized> Arc<T> {
/// `ptr` must have been returned by a previous call to [`Arc::into_raw`]. Additionally, it
/// must not be called more than once for each previous call to [`Arc::into_raw`].
pub unsafe fn from_raw(ptr: *const T) -> Self {
- let refcount_layout = Layout::new::<bindings::refcount_t>();
- // SAFETY: The caller guarantees that the pointer is valid.
- let val_layout = Layout::for_value(unsafe { &*ptr });
- // SAFETY: We're computing the layout of a real struct that existed when compiling this
- // binary, so its layout is not so large that it can trigger arithmetic overflow.
- let val_offset = unsafe { refcount_layout.extend(val_layout).unwrap_unchecked().1 };
-
- let metadata: <T as Pointee>::Metadata = core::ptr::metadata(ptr);
- // SAFETY: The metadata of `T` and `ArcInner<T>` is the same because `ArcInner` is a struct
- // with `T` as its last field.
- //
- // This is documented at:
- // <https://doc.rust-lang.org/std/ptr/trait.Pointee.html>.
- let metadata: <ArcInner<T> as Pointee>::Metadata =
- unsafe { core::mem::transmute_copy(&metadata) };
- // SAFETY: The pointer is in-bounds of an allocation both before and after offsetting the
- // pointer, since it originates from a previous call to `Arc::into_raw` and is still valid.
- let ptr = unsafe { (ptr as *mut u8).sub(val_offset) as *mut () };
- let ptr = core::ptr::from_raw_parts_mut(ptr, metadata);
+ // SAFETY: The caller promises that this pointer originates from a call to `into_raw` on an
+ // `Arc` that is still valid.
+ let ptr = unsafe { ArcInner::container_of(ptr) };
// SAFETY: By the safety requirements we know that `ptr` came from `Arc::into_raw`, so the
// reference count held then will be owned by the new `Arc` object.
- unsafe { Self::from_inner(NonNull::new_unchecked(ptr)) }
+ unsafe { Self::from_inner(ptr) }
}
/// Returns an [`ArcBorrow`] from the given [`Arc`].
@@ -273,30 +278,103 @@ impl<T: ?Sized> Arc<T> {
pub fn ptr_eq(this: &Self, other: &Self) -> bool {
core::ptr::eq(this.ptr.as_ptr(), other.ptr.as_ptr())
}
+
+ /// Converts this [`Arc`] into a [`UniqueArc`], or destroys it if it is not unique.
+ ///
+ /// When this destroys the `Arc`, it does so while properly avoiding races. This means that
+ /// this method will never call the destructor of the value.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use kernel::sync::{Arc, UniqueArc};
+ ///
+ /// let arc = Arc::new(42, GFP_KERNEL)?;
+ /// let unique_arc = arc.into_unique_or_drop();
+ ///
+ /// // The above conversion should succeed since refcount of `arc` is 1.
+ /// assert!(unique_arc.is_some());
+ ///
+ /// assert_eq!(*(unique_arc.unwrap()), 42);
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ ///
+ /// ```
+ /// use kernel::sync::{Arc, UniqueArc};
+ ///
+ /// let arc = Arc::new(42, GFP_KERNEL)?;
+ /// let another = arc.clone();
+ ///
+ /// let unique_arc = arc.into_unique_or_drop();
+ ///
+ /// // The above conversion should fail since refcount of `arc` is >1.
+ /// assert!(unique_arc.is_none());
+ ///
+ /// # Ok::<(), Error>(())
+ /// ```
+ pub fn into_unique_or_drop(self) -> Option<Pin<UniqueArc<T>>> {
+ // We will manually manage the refcount in this method, so we disable the destructor.
+ let me = ManuallyDrop::new(self);
+ // SAFETY: We own a refcount, so the pointer is still valid.
+ let refcount = unsafe { me.ptr.as_ref() }.refcount.get();
+
+ // If the refcount reaches a non-zero value, then we have destroyed this `Arc` and will
+ // return without further touching the `Arc`. If the refcount reaches zero, then there are
+ // no other arcs, and we can create a `UniqueArc`.
+ //
+ // SAFETY: We own a refcount, so the pointer is not dangling.
+ let is_zero = unsafe { bindings::refcount_dec_and_test(refcount) };
+ if is_zero {
+ // SAFETY: We have exclusive access to the arc, so we can perform unsynchronized
+ // accesses to the refcount.
+ unsafe { core::ptr::write(refcount, bindings::REFCOUNT_INIT(1)) };
+
+ // INVARIANT: We own the only refcount to this arc, so we may create a `UniqueArc`. We
+ // must pin the `UniqueArc` because the values was previously in an `Arc`, and they pin
+ // their values.
+ Some(Pin::from(UniqueArc {
+ inner: ManuallyDrop::into_inner(me),
+ }))
+ } else {
+ None
+ }
+ }
}
impl<T: 'static> ForeignOwnable for Arc<T> {
type Borrowed<'a> = ArcBorrow<'a, T>;
+ type BorrowedMut<'a> = Self::Borrowed<'a>;
- fn into_foreign(self) -> *const core::ffi::c_void {
- ManuallyDrop::new(self).ptr.as_ptr() as _
+ fn into_foreign(self) -> *mut crate::ffi::c_void {
+ ManuallyDrop::new(self).ptr.as_ptr().cast()
}
- unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> ArcBorrow<'a, T> {
+ unsafe fn from_foreign(ptr: *mut crate::ffi::c_void) -> Self {
+ // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous
+ // call to `Self::into_foreign`.
+ let inner = unsafe { NonNull::new_unchecked(ptr.cast::<ArcInner<T>>()) };
+
// SAFETY: By the safety requirement of this function, we know that `ptr` came from
- // a previous call to `Arc::into_foreign`.
- let inner = NonNull::new(ptr as *mut ArcInner<T>).unwrap();
+ // a previous call to `Arc::into_foreign`, which guarantees that `ptr` is valid and
+ // holds a reference count increment that is transferrable to us.
+ unsafe { Self::from_inner(inner) }
+ }
+
+ unsafe fn borrow<'a>(ptr: *mut crate::ffi::c_void) -> ArcBorrow<'a, T> {
+ // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous
+ // call to `Self::into_foreign`.
+ let inner = unsafe { NonNull::new_unchecked(ptr.cast::<ArcInner<T>>()) };
// SAFETY: The safety requirements of `from_foreign` ensure that the object remains alive
// for the lifetime of the returned value.
unsafe { ArcBorrow::new(inner) }
}
- unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self {
- // SAFETY: By the safety requirement of this function, we know that `ptr` came from
- // a previous call to `Arc::into_foreign`, which guarantees that `ptr` is valid and
- // holds a reference count increment that is transferrable to us.
- unsafe { Self::from_inner(NonNull::new(ptr as _).unwrap()) }
+ unsafe fn borrow_mut<'a>(ptr: *mut crate::ffi::c_void) -> ArcBorrow<'a, T> {
+ // SAFETY: The safety requirements for `borrow_mut` are a superset of the safety
+ // requirements for `borrow`.
+ unsafe { Self::borrow(ptr) }
}
}
@@ -318,10 +396,14 @@ impl<T: ?Sized> AsRef<T> for Arc<T> {
impl<T: ?Sized> Clone for Arc<T> {
fn clone(&self) -> Self {
+ // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is
+ // safe to dereference it.
+ let refcount = unsafe { self.ptr.as_ref() }.refcount.get();
+
// INVARIANT: C `refcount_inc` saturates the refcount, so it cannot overflow to zero.
// SAFETY: By the type invariant, there is necessarily a reference to the object, so it is
// safe to increment the refcount.
- unsafe { bindings::refcount_inc(self.ptr.as_ref().refcount.get()) };
+ unsafe { bindings::refcount_inc(refcount) };
// SAFETY: We just incremented the refcount. This increment is now owned by the new `Arc`.
unsafe { Self::from_inner(self.ptr) }
@@ -343,8 +425,8 @@ impl<T: ?Sized> Drop for Arc<T> {
if is_zero {
// The count reached zero, we must free the memory.
//
- // SAFETY: The pointer was initialised from the result of `Box::leak`.
- unsafe { drop(Box::from_raw(self.ptr.as_ptr())) };
+ // SAFETY: The pointer was initialised from the result of `KBox::leak`.
+ unsafe { drop(KBox::from_raw(self.ptr.as_ptr())) };
}
}
}
@@ -365,12 +447,12 @@ impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> {
/// A borrowed reference to an [`Arc`] instance.
///
/// For cases when one doesn't ever need to increment the refcount on the allocation, it is simpler
-/// to use just `&T`, which we can trivially get from an `Arc<T>` instance.
+/// to use just `&T`, which we can trivially get from an [`Arc<T>`] instance.
///
/// However, when one may need to increment the refcount, it is preferable to use an `ArcBorrow<T>`
/// over `&Arc<T>` because the latter results in a double-indirection: a pointer (shared reference)
-/// to a pointer (`Arc<T>`) to the object (`T`). An [`ArcBorrow`] eliminates this double
-/// indirection while still allowing one to increment the refcount and getting an `Arc<T>` when/if
+/// to a pointer ([`Arc<T>`]) to the object (`T`). An [`ArcBorrow`] eliminates this double
+/// indirection while still allowing one to increment the refcount and getting an [`Arc<T>`] when/if
/// needed.
///
/// # Invariants
@@ -389,7 +471,7 @@ impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> {
/// e.into()
/// }
///
-/// let obj = Arc::try_new(Example)?;
+/// let obj = Arc::new(Example, GFP_KERNEL)?;
/// let cloned = do_something(obj.as_arc_borrow());
///
/// // Assert that both `obj` and `cloned` point to the same underlying object.
@@ -413,21 +495,21 @@ impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> {
/// }
/// }
///
-/// let obj = Arc::try_new(Example { a: 10, b: 20 })?;
+/// let obj = Arc::new(Example { a: 10, b: 20 }, GFP_KERNEL)?;
/// obj.as_arc_borrow().use_reference();
/// # Ok::<(), Error>(())
/// ```
+#[repr(transparent)]
+#[cfg_attr(CONFIG_RUSTC_HAS_COERCE_POINTEE, derive(core::marker::CoercePointee))]
pub struct ArcBorrow<'a, T: ?Sized + 'a> {
inner: NonNull<ArcInner<T>>,
_p: PhantomData<&'a ()>,
}
-// This is to allow [`ArcBorrow`] (and variants) to be used as the type of `self`.
-impl<T: ?Sized> core::ops::Receiver for ArcBorrow<'_, T> {}
-
// This is to allow `ArcBorrow<U>` to be dispatched on when `ArcBorrow<T>` can be coerced into
// `ArcBorrow<U>`.
-impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<ArcBorrow<'_, U>>
+#[cfg(not(CONFIG_RUSTC_HAS_COERCE_POINTEE))]
+impl<T: ?Sized + core::marker::Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<ArcBorrow<'_, U>>
for ArcBorrow<'_, T>
{
}
@@ -455,6 +537,27 @@ impl<T: ?Sized> ArcBorrow<'_, T> {
_p: PhantomData,
}
}
+
+ /// Creates an [`ArcBorrow`] to an [`Arc`] that has previously been deconstructed with
+ /// [`Arc::into_raw`].
+ ///
+ /// # Safety
+ ///
+ /// * The provided pointer must originate from a call to [`Arc::into_raw`].
+ /// * For the duration of the lifetime annotated on this `ArcBorrow`, the reference count must
+ /// not hit zero.
+ /// * For the duration of the lifetime annotated on this `ArcBorrow`, there must not be a
+ /// [`UniqueArc`] reference to this value.
+ pub unsafe fn from_raw(ptr: *const T) -> Self {
+ // SAFETY: The caller promises that this pointer originates from a call to `into_raw` on an
+ // `Arc` that is still valid.
+ let ptr = unsafe { ArcInner::container_of(ptr) };
+
+ // SAFETY: The caller promises that the value remains valid since the reference count must
+ // not hit zero, and no mutable reference will be created since that would involve a
+ // `UniqueArc`.
+ unsafe { Self::new(ptr) }
+ }
}
impl<T: ?Sized> From<ArcBorrow<'_, T>> for Arc<T> {
@@ -501,7 +604,7 @@ impl<T: ?Sized> Deref for ArcBorrow<'_, T> {
/// }
///
/// fn test() -> Result<Arc<Example>> {
-/// let mut x = UniqueArc::try_new(Example { a: 10, b: 20 })?;
+/// let mut x = UniqueArc::new(Example { a: 10, b: 20 }, GFP_KERNEL)?;
/// x.a += 1;
/// x.b += 1;
/// Ok(x.into())
@@ -510,7 +613,7 @@ impl<T: ?Sized> Deref for ArcBorrow<'_, T> {
/// # test().unwrap();
/// ```
///
-/// In the following example we first allocate memory for a ref-counted `Example` but we don't
+/// In the following example we first allocate memory for a refcounted `Example` but we don't
/// initialise it on allocation. We do initialise it later with a call to [`UniqueArc::write`],
/// followed by a conversion to `Arc<Example>`. This is particularly useful when allocation happens
/// in one context (e.g., sleepable) and initialisation in another (e.g., atomic):
@@ -524,7 +627,7 @@ impl<T: ?Sized> Deref for ArcBorrow<'_, T> {
/// }
///
/// fn test() -> Result<Arc<Example>> {
-/// let x = UniqueArc::try_new_uninit()?;
+/// let x = UniqueArc::new_uninit(GFP_KERNEL)?;
/// Ok(x.write(Example { a: 10, b: 20 }).into())
/// }
///
@@ -544,7 +647,7 @@ impl<T: ?Sized> Deref for ArcBorrow<'_, T> {
/// }
///
/// fn test() -> Result<Arc<Example>> {
-/// let mut pinned = Pin::from(UniqueArc::try_new(Example { a: 10, b: 20 })?);
+/// let mut pinned = Pin::from(UniqueArc::new(Example { a: 10, b: 20 }, GFP_KERNEL)?);
/// // We can modify `pinned` because it is `Unpin`.
/// pinned.as_mut().a += 1;
/// Ok(pinned.into())
@@ -558,25 +661,28 @@ pub struct UniqueArc<T: ?Sized> {
impl<T> UniqueArc<T> {
/// Tries to allocate a new [`UniqueArc`] instance.
- pub fn try_new(value: T) -> Result<Self, AllocError> {
+ pub fn new(value: T, flags: Flags) -> Result<Self, AllocError> {
Ok(Self {
- // INVARIANT: The newly-created object has a ref-count of 1.
- inner: Arc::try_new(value)?,
+ // INVARIANT: The newly-created object has a refcount of 1.
+ inner: Arc::new(value, flags)?,
})
}
/// Tries to allocate a new [`UniqueArc`] instance whose contents are not initialised yet.
- pub fn try_new_uninit() -> Result<UniqueArc<MaybeUninit<T>>, AllocError> {
+ pub fn new_uninit(flags: Flags) -> Result<UniqueArc<MaybeUninit<T>>, AllocError> {
// INVARIANT: The refcount is initialised to a non-zero value.
- let inner = Box::try_init::<AllocError>(try_init!(ArcInner {
- // SAFETY: There are no safety requirements for this FFI call.
- refcount: Opaque::new(unsafe { bindings::REFCOUNT_INIT(1) }),
- data <- init::uninit::<T, AllocError>(),
- }? AllocError))?;
+ let inner = KBox::try_init::<AllocError>(
+ try_init!(ArcInner {
+ // SAFETY: There are no safety requirements for this FFI call.
+ refcount: Opaque::new(unsafe { bindings::REFCOUNT_INIT(1) }),
+ data <- init::uninit::<T, AllocError>(),
+ }? AllocError),
+ flags,
+ )?;
Ok(UniqueArc {
- // INVARIANT: The newly-created object has a ref-count of 1.
- // SAFETY: The pointer from the `Box` is valid.
- inner: unsafe { Arc::from_inner(Box::leak(inner).into()) },
+ // INVARIANT: The newly-created object has a refcount of 1.
+ // SAFETY: The pointer from the `KBox` is valid.
+ inner: unsafe { Arc::from_inner(KBox::leak(inner).into()) },
})
}
}
diff --git a/rust/kernel/sync/arc/std_vendor.rs b/rust/kernel/sync/arc/std_vendor.rs
index a66a0c2831b3..11b3f4ecca5f 100644
--- a/rust/kernel/sync/arc/std_vendor.rs
+++ b/rust/kernel/sync/arc/std_vendor.rs
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
+//! Rust standard library vendored code.
+//!
//! The contents of this file come from the Rust standard library, hosted in
//! the <https://github.com/rust-lang/rust> repository, licensed under
//! "Apache-2.0 OR MIT" and adapted for kernel use. For copyright details,
diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs
index f65e19d5a37c..7df565038d7d 100644
--- a/rust/kernel/sync/condvar.rs
+++ b/rust/kernel/sync/condvar.rs
@@ -6,8 +6,17 @@
//! variable.
use super::{lock::Backend, lock::Guard, LockClassKey};
-use crate::{bindings, init::PinInit, pin_init, str::CStr, types::Opaque};
+use crate::{
+ ffi::{c_int, c_long},
+ init::PinInit,
+ pin_init,
+ str::CStr,
+ task::{MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE, TASK_NORMAL, TASK_UNINTERRUPTIBLE},
+ time::Jiffies,
+ types::Opaque,
+};
use core::marker::PhantomPinned;
+use core::ptr;
use macros::pin_data;
/// Creates a [`CondVar`] initialiser with the given name and a newly-created lock class.
@@ -17,6 +26,7 @@ macro_rules! new_condvar {
$crate::sync::CondVar::new($crate::optional_name!($($name)?), $crate::static_lock_class!())
};
}
+pub use new_condvar;
/// A conditional variable.
///
@@ -34,8 +44,7 @@ macro_rules! new_condvar {
/// The following is an example of using a condvar with a mutex:
///
/// ```
-/// use kernel::sync::{CondVar, Mutex};
-/// use kernel::{new_condvar, new_mutex};
+/// use kernel::sync::{new_condvar, new_mutex, CondVar, Mutex};
///
/// #[pin_data]
/// pub struct Example {
@@ -61,11 +70,11 @@ macro_rules! new_condvar {
/// }
///
/// /// Allocates a new boxed `Example`.
-/// fn new_example() -> Result<Pin<Box<Example>>> {
-/// Box::pin_init(pin_init!(Example {
+/// fn new_example() -> Result<Pin<KBox<Example>>> {
+/// KBox::pin_init(pin_init!(Example {
/// value <- new_mutex!(0),
/// value_changed <- new_condvar!(),
-/// }))
+/// }), GFP_KERNEL)
/// }
/// ```
///
@@ -73,16 +82,17 @@ macro_rules! new_condvar {
#[pin_data]
pub struct CondVar {
#[pin]
- pub(crate) wait_list: Opaque<bindings::wait_queue_head>,
+ pub(crate) wait_queue_head: Opaque<bindings::wait_queue_head>,
/// A condvar needs to be pinned because it contains a [`struct list_head`] that is
/// self-referential, so it cannot be safely moved once it is initialised.
+ ///
+ /// [`struct list_head`]: srctree/include/linux/types.h
#[pin]
_pin: PhantomPinned,
}
// SAFETY: `CondVar` only uses a `struct wait_queue_head`, which is safe to use on any thread.
-#[allow(clippy::non_send_fields_in_send_ty)]
unsafe impl Send for CondVar {}
// SAFETY: `CondVar` only uses a `struct wait_queue_head`, which is safe to use on multiple threads
@@ -96,28 +106,35 @@ impl CondVar {
_pin: PhantomPinned,
// SAFETY: `slot` is valid while the closure is called and both `name` and `key` have
// static lifetimes so they live indefinitely.
- wait_list <- Opaque::ffi_init(|slot| unsafe {
+ wait_queue_head <- Opaque::ffi_init(|slot| unsafe {
bindings::__init_waitqueue_head(slot, name.as_char_ptr(), key.as_ptr())
}),
})
}
- fn wait_internal<T: ?Sized, B: Backend>(&self, wait_state: u32, guard: &mut Guard<'_, T, B>) {
+ fn wait_internal<T: ?Sized, B: Backend>(
+ &self,
+ wait_state: c_int,
+ guard: &mut Guard<'_, T, B>,
+ timeout_in_jiffies: c_long,
+ ) -> c_long {
let wait = Opaque::<bindings::wait_queue_entry>::uninit();
// SAFETY: `wait` points to valid memory.
unsafe { bindings::init_wait(wait.get()) };
- // SAFETY: Both `wait` and `wait_list` point to valid memory.
+ // SAFETY: Both `wait` and `wait_queue_head` point to valid memory.
unsafe {
- bindings::prepare_to_wait_exclusive(self.wait_list.get(), wait.get(), wait_state as _)
+ bindings::prepare_to_wait_exclusive(self.wait_queue_head.get(), wait.get(), wait_state)
};
- // SAFETY: No arguments, switches to another thread.
- guard.do_unlocked(|| unsafe { bindings::schedule() });
+ // SAFETY: Switches to another thread. The timeout can be any number.
+ let ret = guard.do_unlocked(|| unsafe { bindings::schedule_timeout(timeout_in_jiffies) });
+
+ // SAFETY: Both `wait` and `wait_queue_head` point to valid memory.
+ unsafe { bindings::finish_wait(self.wait_queue_head.get(), wait.get()) };
- // SAFETY: Both `wait` and `wait_list` point to valid memory.
- unsafe { bindings::finish_wait(self.wait_list.get(), wait.get()) };
+ ret
}
/// Releases the lock and waits for a notification in uninterruptible mode.
@@ -127,7 +144,7 @@ impl CondVar {
/// [`CondVar::notify_one`] or [`CondVar::notify_all`]. Note that it may also wake up
/// spuriously.
pub fn wait<T: ?Sized, B: Backend>(&self, guard: &mut Guard<'_, T, B>) {
- self.wait_internal(bindings::TASK_UNINTERRUPTIBLE, guard);
+ self.wait_internal(TASK_UNINTERRUPTIBLE, guard, MAX_SCHEDULE_TIMEOUT);
}
/// Releases the lock and waits for a notification in interruptible mode.
@@ -138,29 +155,60 @@ impl CondVar {
/// Returns whether there is a signal pending.
#[must_use = "wait_interruptible returns if a signal is pending, so the caller must check the return value"]
pub fn wait_interruptible<T: ?Sized, B: Backend>(&self, guard: &mut Guard<'_, T, B>) -> bool {
- self.wait_internal(bindings::TASK_INTERRUPTIBLE, guard);
+ self.wait_internal(TASK_INTERRUPTIBLE, guard, MAX_SCHEDULE_TIMEOUT);
crate::current!().signal_pending()
}
- /// Calls the kernel function to notify the appropriate number of threads with the given flags.
- fn notify(&self, count: i32, flags: u32) {
- // SAFETY: `wait_list` points to valid memory.
+ /// Releases the lock and waits for a notification in interruptible mode.
+ ///
+ /// Atomically releases the given lock (whose ownership is proven by the guard) and puts the
+ /// thread to sleep. It wakes up when notified by [`CondVar::notify_one`] or
+ /// [`CondVar::notify_all`], or when a timeout occurs, or when the thread receives a signal.
+ #[must_use = "wait_interruptible_timeout returns if a signal is pending, so the caller must check the return value"]
+ pub fn wait_interruptible_timeout<T: ?Sized, B: Backend>(
+ &self,
+ guard: &mut Guard<'_, T, B>,
+ jiffies: Jiffies,
+ ) -> CondVarTimeoutResult {
+ let jiffies = jiffies.try_into().unwrap_or(MAX_SCHEDULE_TIMEOUT);
+ let res = self.wait_internal(TASK_INTERRUPTIBLE, guard, jiffies);
+
+ match (res as Jiffies, crate::current!().signal_pending()) {
+ (jiffies, true) => CondVarTimeoutResult::Signal { jiffies },
+ (0, false) => CondVarTimeoutResult::Timeout,
+ (jiffies, false) => CondVarTimeoutResult::Woken { jiffies },
+ }
+ }
+
+ /// Calls the kernel function to notify the appropriate number of threads.
+ fn notify(&self, count: c_int) {
+ // SAFETY: `wait_queue_head` points to valid memory.
unsafe {
bindings::__wake_up(
- self.wait_list.get(),
- bindings::TASK_NORMAL,
+ self.wait_queue_head.get(),
+ TASK_NORMAL,
count,
- flags as _,
+ ptr::null_mut(),
)
};
}
+ /// Calls the kernel function to notify one thread synchronously.
+ ///
+ /// This method behaves like `notify_one`, except that it hints to the scheduler that the
+ /// current thread is about to go to sleep, so it should schedule the target thread on the same
+ /// CPU.
+ pub fn notify_sync(&self) {
+ // SAFETY: `wait_queue_head` points to valid memory.
+ unsafe { bindings::__wake_up_sync(self.wait_queue_head.get(), TASK_NORMAL) };
+ }
+
/// Wakes a single waiter up, if any.
///
/// This is not 'sticky' in the sense that if no thread is waiting, the notification is lost
/// completely (as opposed to automatically waking up the next waiter).
pub fn notify_one(&self) {
- self.notify(1, 0);
+ self.notify(1);
}
/// Wakes all waiters up, if any.
@@ -168,6 +216,22 @@ impl CondVar {
/// This is not 'sticky' in the sense that if no thread is waiting, the notification is lost
/// completely (as opposed to automatically waking up the next waiter).
pub fn notify_all(&self) {
- self.notify(0, 0);
+ self.notify(0);
}
}
+
+/// The return type of `wait_timeout`.
+pub enum CondVarTimeoutResult {
+ /// The timeout was reached.
+ Timeout,
+ /// Somebody woke us up.
+ Woken {
+ /// Remaining sleep duration.
+ jiffies: Jiffies,
+ },
+ /// A signal occurred.
+ Signal {
+ /// Remaining sleep duration.
+ jiffies: Jiffies,
+ },
+}
diff --git a/rust/kernel/sync/lock.rs b/rust/kernel/sync/lock.rs
index f12a684bc957..eb80048e0110 100644
--- a/rust/kernel/sync/lock.rs
+++ b/rust/kernel/sync/lock.rs
@@ -6,13 +6,21 @@
//! spinlocks, raw spinlocks) to be provided with minimal effort.
use super::LockClassKey;
-use crate::{bindings, init::PinInit, pin_init, str::CStr, types::Opaque, types::ScopeGuard};
-use core::{cell::UnsafeCell, marker::PhantomData, marker::PhantomPinned};
+use crate::{
+ init::PinInit,
+ pin_init,
+ str::CStr,
+ types::{NotThreadSafe, Opaque, ScopeGuard},
+};
+use core::{cell::UnsafeCell, marker::PhantomPinned};
use macros::pin_data;
pub mod mutex;
pub mod spinlock;
+pub(super) mod global;
+pub use global::{GlobalGuard, GlobalLock, GlobalLockBackend, GlobalLockedBy};
+
/// The "backend" of a lock.
///
/// It is the actual implementation of the lock, without the need to repeat patterns used in all
@@ -21,14 +29,21 @@ pub mod spinlock;
/// # Safety
///
/// - Implementers must ensure that only one thread/CPU may access the protected data once the lock
-/// is owned, that is, between calls to `lock` and `unlock`.
-/// - Implementers must also ensure that `relock` uses the same locking method as the original
-/// lock operation.
+/// is owned, that is, between calls to [`lock`] and [`unlock`].
+/// - Implementers must also ensure that [`relock`] uses the same locking method as the original
+/// lock operation.
+///
+/// [`lock`]: Backend::lock
+/// [`unlock`]: Backend::unlock
+/// [`relock`]: Backend::relock
pub unsafe trait Backend {
/// The state required by the lock.
type State;
- /// The state required to be kept between lock and unlock.
+ /// The state required to be kept between [`lock`] and [`unlock`].
+ ///
+ /// [`lock`]: Backend::lock
+ /// [`unlock`]: Backend::unlock
type GuardState;
/// Initialises the lock.
@@ -39,7 +54,7 @@ pub unsafe trait Backend {
/// remain valid for read indefinitely.
unsafe fn init(
ptr: *mut Self::State,
- name: *const core::ffi::c_char,
+ name: *const crate::ffi::c_char,
key: *mut bindings::lock_class_key,
);
@@ -51,6 +66,13 @@ pub unsafe trait Backend {
#[must_use]
unsafe fn lock(ptr: *mut Self::State) -> Self::GuardState;
+ /// Tries to acquire the lock.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that [`Backend::init`] has been previously called.
+ unsafe fn try_lock(ptr: *mut Self::State) -> Option<Self::GuardState>;
+
/// Releases the lock, giving up its ownership.
///
/// # Safety
@@ -68,12 +90,20 @@ pub unsafe trait Backend {
// SAFETY: The safety requirements ensure that the lock is initialised.
*guard_state = unsafe { Self::lock(ptr) };
}
+
+ /// Asserts that the lock is held using lockdep.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that [`Backend::init`] has been previously called.
+ unsafe fn assert_is_held(ptr: *mut Self::State);
}
/// A mutual exclusion primitive.
///
/// Exposes one of the kernel locking primitives. Which one is exposed depends on the lock
/// [`Backend`] specified as the generic parameter `B`.
+#[repr(C)]
#[pin_data]
pub struct Lock<T: ?Sized, B: Backend> {
/// The kernel lock object.
@@ -112,6 +142,28 @@ impl<T, B: Backend> Lock<T, B> {
}
}
+impl<B: Backend> Lock<(), B> {
+ /// Constructs a [`Lock`] from a raw pointer.
+ ///
+ /// This can be useful for interacting with a lock which was initialised outside of Rust.
+ ///
+ /// # Safety
+ ///
+ /// The caller promises that `ptr` points to a valid initialised instance of [`State`] during
+ /// the whole lifetime of `'a`.
+ ///
+ /// [`State`]: Backend::State
+ pub unsafe fn from_raw<'a>(ptr: *mut B::State) -> &'a Self {
+ // SAFETY:
+ // - By the safety contract `ptr` must point to a valid initialised instance of `B::State`
+ // - Since the lock data type is `()` which is a ZST, `state` is the only non-ZST member of
+ // the struct
+ // - Combined with `#[repr(C)]`, this guarantees `Self` has an equivalent data layout to
+ // `B::State`.
+ unsafe { &*ptr.cast() }
+ }
+}
+
impl<T: ?Sized, B: Backend> Lock<T, B> {
/// Acquires the lock and gives the caller access to the data protected by it.
pub fn lock(&self) -> Guard<'_, T, B> {
@@ -121,6 +173,15 @@ impl<T: ?Sized, B: Backend> Lock<T, B> {
// SAFETY: The lock was just acquired.
unsafe { Guard::new(self, state) }
}
+
+ /// Tries to acquire the lock.
+ ///
+ /// Returns a guard that can be used to access the data protected by the lock if successful.
+ pub fn try_lock(&self) -> Option<Guard<'_, T, B>> {
+ // SAFETY: The constructor of the type calls `init`, so the existence of the object proves
+ // that `init` was called.
+ unsafe { B::try_lock(self.state.get()).map(|state| Guard::new(self, state)) }
+ }
}
/// A lock guard.
@@ -132,22 +193,22 @@ impl<T: ?Sized, B: Backend> Lock<T, B> {
pub struct Guard<'a, T: ?Sized, B: Backend> {
pub(crate) lock: &'a Lock<T, B>,
pub(crate) state: B::GuardState,
- _not_send: PhantomData<*mut ()>,
+ _not_send: NotThreadSafe,
}
// SAFETY: `Guard` is sync when the data protected by the lock is also sync.
unsafe impl<T: Sync + ?Sized, B: Backend> Sync for Guard<'_, T, B> {}
impl<T: ?Sized, B: Backend> Guard<'_, T, B> {
- pub(crate) fn do_unlocked(&mut self, cb: impl FnOnce()) {
+ pub(crate) fn do_unlocked<U>(&mut self, cb: impl FnOnce() -> U) -> U {
// SAFETY: The caller owns the lock, so it is safe to unlock it.
unsafe { B::unlock(self.lock.state.get(), &self.state) };
- // SAFETY: The lock was just unlocked above and is being relocked now.
- let _relock =
- ScopeGuard::new(|| unsafe { B::relock(self.lock.state.get(), &mut self.state) });
+ let _relock = ScopeGuard::new(||
+ // SAFETY: The lock was just unlocked above and is being relocked now.
+ unsafe { B::relock(self.lock.state.get(), &mut self.state) });
- cb();
+ cb()
}
}
@@ -180,11 +241,14 @@ impl<'a, T: ?Sized, B: Backend> Guard<'a, T, B> {
/// # Safety
///
/// The caller must ensure that it owns the lock.
- pub(crate) unsafe fn new(lock: &'a Lock<T, B>, state: B::GuardState) -> Self {
+ pub unsafe fn new(lock: &'a Lock<T, B>, state: B::GuardState) -> Self {
+ // SAFETY: The caller can only hold the lock if `Backend::init` has already been called.
+ unsafe { B::assert_is_held(lock.state.get()) };
+
Self {
lock,
state,
- _not_send: PhantomData,
+ _not_send: NotThreadSafe,
}
}
}
diff --git a/rust/kernel/sync/lock/global.rs b/rust/kernel/sync/lock/global.rs
new file mode 100644
index 000000000000..480ee724e3cc
--- /dev/null
+++ b/rust/kernel/sync/lock/global.rs
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Support for defining statics containing locks.
+
+use crate::{
+ str::CStr,
+ sync::lock::{Backend, Guard, Lock},
+ sync::{LockClassKey, LockedBy},
+ types::Opaque,
+};
+use core::{
+ cell::UnsafeCell,
+ marker::{PhantomData, PhantomPinned},
+};
+
+/// Trait implemented for marker types for global locks.
+///
+/// See [`global_lock!`] for examples.
+pub trait GlobalLockBackend {
+ /// The name for this global lock.
+ const NAME: &'static CStr;
+ /// Item type stored in this global lock.
+ type Item: 'static;
+ /// The backend used for this global lock.
+ type Backend: Backend + 'static;
+ /// The class for this global lock.
+ fn get_lock_class() -> &'static LockClassKey;
+}
+
+/// Type used for global locks.
+///
+/// See [`global_lock!`] for examples.
+pub struct GlobalLock<B: GlobalLockBackend> {
+ inner: Lock<B::Item, B::Backend>,
+}
+
+impl<B: GlobalLockBackend> GlobalLock<B> {
+ /// Creates a global lock.
+ ///
+ /// # Safety
+ ///
+ /// * Before any other method on this lock is called, [`Self::init`] must be called.
+ /// * The type `B` must not be used with any other lock.
+ pub const unsafe fn new(data: B::Item) -> Self {
+ Self {
+ inner: Lock {
+ state: Opaque::uninit(),
+ data: UnsafeCell::new(data),
+ _pin: PhantomPinned,
+ },
+ }
+ }
+
+ /// Initializes a global lock.
+ ///
+ /// # Safety
+ ///
+ /// Must not be called more than once on a given lock.
+ pub unsafe fn init(&'static self) {
+ // SAFETY: The pointer to `state` is valid for the duration of this call, and both `name`
+ // and `key` are valid indefinitely. The `state` is pinned since we have a `'static`
+ // reference to `self`.
+ //
+ // We have exclusive access to the `state` since the caller of `new` promised to call
+ // `init` before using any other methods. As `init` can only be called once, all other
+ // uses of this lock must happen after this call.
+ unsafe {
+ B::Backend::init(
+ self.inner.state.get(),
+ B::NAME.as_char_ptr(),
+ B::get_lock_class().as_ptr(),
+ )
+ }
+ }
+
+ /// Lock this global lock.
+ pub fn lock(&'static self) -> GlobalGuard<B> {
+ GlobalGuard {
+ inner: self.inner.lock(),
+ }
+ }
+
+ /// Try to lock this global lock.
+ pub fn try_lock(&'static self) -> Option<GlobalGuard<B>> {
+ Some(GlobalGuard {
+ inner: self.inner.try_lock()?,
+ })
+ }
+}
+
+/// A guard for a [`GlobalLock`].
+///
+/// See [`global_lock!`] for examples.
+pub struct GlobalGuard<B: GlobalLockBackend> {
+ inner: Guard<'static, B::Item, B::Backend>,
+}
+
+impl<B: GlobalLockBackend> core::ops::Deref for GlobalGuard<B> {
+ type Target = B::Item;
+
+ fn deref(&self) -> &Self::Target {
+ &self.inner
+ }
+}
+
+impl<B: GlobalLockBackend> core::ops::DerefMut for GlobalGuard<B> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.inner
+ }
+}
+
+/// A version of [`LockedBy`] for a [`GlobalLock`].
+///
+/// See [`global_lock!`] for examples.
+pub struct GlobalLockedBy<T: ?Sized, B: GlobalLockBackend> {
+ _backend: PhantomData<B>,
+ value: UnsafeCell<T>,
+}
+
+// SAFETY: The same thread-safety rules as `LockedBy` apply to `GlobalLockedBy`.
+unsafe impl<T, B> Send for GlobalLockedBy<T, B>
+where
+ T: ?Sized,
+ B: GlobalLockBackend,
+ LockedBy<T, B::Item>: Send,
+{
+}
+
+// SAFETY: The same thread-safety rules as `LockedBy` apply to `GlobalLockedBy`.
+unsafe impl<T, B> Sync for GlobalLockedBy<T, B>
+where
+ T: ?Sized,
+ B: GlobalLockBackend,
+ LockedBy<T, B::Item>: Sync,
+{
+}
+
+impl<T, B: GlobalLockBackend> GlobalLockedBy<T, B> {
+ /// Create a new [`GlobalLockedBy`].
+ ///
+ /// The provided value will be protected by the global lock indicated by `B`.
+ pub fn new(val: T) -> Self {
+ Self {
+ value: UnsafeCell::new(val),
+ _backend: PhantomData,
+ }
+ }
+}
+
+impl<T: ?Sized, B: GlobalLockBackend> GlobalLockedBy<T, B> {
+ /// Access the value immutably.
+ ///
+ /// The caller must prove shared access to the lock.
+ pub fn as_ref<'a>(&'a self, _guard: &'a GlobalGuard<B>) -> &'a T {
+ // SAFETY: The lock is globally unique, so there can only be one guard.
+ unsafe { &*self.value.get() }
+ }
+
+ /// Access the value mutably.
+ ///
+ /// The caller must prove shared exclusive to the lock.
+ pub fn as_mut<'a>(&'a self, _guard: &'a mut GlobalGuard<B>) -> &'a mut T {
+ // SAFETY: The lock is globally unique, so there can only be one guard.
+ unsafe { &mut *self.value.get() }
+ }
+
+ /// Access the value mutably directly.
+ ///
+ /// The caller has exclusive access to this `GlobalLockedBy`, so they do not need to hold the
+ /// lock.
+ pub fn get_mut(&mut self) -> &mut T {
+ self.value.get_mut()
+ }
+}
+
+/// Defines a global lock.
+///
+/// The global mutex must be initialized before first use. Usually this is done by calling
+/// [`GlobalLock::init`] in the module initializer.
+///
+/// # Examples
+///
+/// A global counter:
+///
+/// ```
+/// # mod ex {
+/// # use kernel::prelude::*;
+/// kernel::sync::global_lock! {
+/// // SAFETY: Initialized in module initializer before first use.
+/// unsafe(uninit) static MY_COUNTER: Mutex<u32> = 0;
+/// }
+///
+/// fn increment_counter() -> u32 {
+/// let mut guard = MY_COUNTER.lock();
+/// *guard += 1;
+/// *guard
+/// }
+///
+/// impl kernel::Module for MyModule {
+/// fn init(_module: &'static ThisModule) -> Result<Self> {
+/// // SAFETY: Called exactly once.
+/// unsafe { MY_COUNTER.init() };
+///
+/// Ok(MyModule {})
+/// }
+/// }
+/// # struct MyModule {}
+/// # }
+/// ```
+///
+/// A global mutex used to protect all instances of a given struct:
+///
+/// ```
+/// # mod ex {
+/// # use kernel::prelude::*;
+/// use kernel::sync::{GlobalGuard, GlobalLockedBy};
+///
+/// kernel::sync::global_lock! {
+/// // SAFETY: Initialized in module initializer before first use.
+/// unsafe(uninit) static MY_MUTEX: Mutex<()> = ();
+/// }
+///
+/// /// All instances of this struct are protected by `MY_MUTEX`.
+/// struct MyStruct {
+/// my_counter: GlobalLockedBy<u32, MY_MUTEX>,
+/// }
+///
+/// impl MyStruct {
+/// /// Increment the counter in this instance.
+/// ///
+/// /// The caller must hold the `MY_MUTEX` mutex.
+/// fn increment(&self, guard: &mut GlobalGuard<MY_MUTEX>) -> u32 {
+/// let my_counter = self.my_counter.as_mut(guard);
+/// *my_counter += 1;
+/// *my_counter
+/// }
+/// }
+///
+/// impl kernel::Module for MyModule {
+/// fn init(_module: &'static ThisModule) -> Result<Self> {
+/// // SAFETY: Called exactly once.
+/// unsafe { MY_MUTEX.init() };
+///
+/// Ok(MyModule {})
+/// }
+/// }
+/// # struct MyModule {}
+/// # }
+/// ```
+#[macro_export]
+macro_rules! global_lock {
+ {
+ $(#[$meta:meta])* $pub:vis
+ unsafe(uninit) static $name:ident: $kind:ident<$valuety:ty> = $value:expr;
+ } => {
+ #[doc = ::core::concat!(
+ "Backend type used by [`",
+ ::core::stringify!($name),
+ "`](static@",
+ ::core::stringify!($name),
+ ")."
+ )]
+ #[allow(non_camel_case_types, unreachable_pub)]
+ $pub enum $name {}
+
+ impl $crate::sync::lock::GlobalLockBackend for $name {
+ const NAME: &'static $crate::str::CStr = $crate::c_str!(::core::stringify!($name));
+ type Item = $valuety;
+ type Backend = $crate::global_lock_inner!(backend $kind);
+
+ fn get_lock_class() -> &'static $crate::sync::LockClassKey {
+ $crate::static_lock_class!()
+ }
+ }
+
+ $(#[$meta])*
+ $pub static $name: $crate::sync::lock::GlobalLock<$name> = {
+ // Defined here to be outside the unsafe scope.
+ let init: $valuety = $value;
+
+ // SAFETY:
+ // * The user of this macro promises to initialize the macro before use.
+ // * We are only generating one static with this backend type.
+ unsafe { $crate::sync::lock::GlobalLock::new(init) }
+ };
+ };
+}
+pub use global_lock;
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! global_lock_inner {
+ (backend Mutex) => {
+ $crate::sync::lock::mutex::MutexBackend
+ };
+ (backend SpinLock) => {
+ $crate::sync::lock::spinlock::SpinLockBackend
+ };
+}
diff --git a/rust/kernel/sync/lock/mutex.rs b/rust/kernel/sync/lock/mutex.rs
index 8c524a3ec45a..70cadbc2e8e2 100644
--- a/rust/kernel/sync/lock/mutex.rs
+++ b/rust/kernel/sync/lock/mutex.rs
@@ -4,8 +4,6 @@
//!
//! This module allows Rust code to use the kernel's `struct mutex`.
-use crate::bindings;
-
/// Creates a [`Mutex`] initialiser with the given name and a newly-created lock class.
///
/// It uses the name if one is given, otherwise it generates one based on the file name and line
@@ -17,6 +15,7 @@ macro_rules! new_mutex {
$inner, $crate::optional_name!($($name)?), $crate::static_lock_class!())
};
}
+pub use new_mutex;
/// A mutual exclusion primitive.
///
@@ -35,7 +34,7 @@ macro_rules! new_mutex {
/// contains an inner struct (`Inner`) that is protected by a mutex.
///
/// ```
-/// use kernel::{init::InPlaceInit, init::PinInit, new_mutex, pin_init, sync::Mutex};
+/// use kernel::sync::{new_mutex, Mutex};
///
/// struct Inner {
/// a: u32,
@@ -59,7 +58,7 @@ macro_rules! new_mutex {
/// }
///
/// // Allocate a boxed `Example`.
-/// let e = Box::pin_init(Example::new())?;
+/// let e = KBox::pin_init(Example::new(), GFP_KERNEL)?;
/// assert_eq!(e.c, 10);
/// assert_eq!(e.d.lock().a, 20);
/// assert_eq!(e.d.lock().b, 30);
@@ -87,6 +86,14 @@ macro_rules! new_mutex {
/// [`struct mutex`]: srctree/include/linux/mutex.h
pub type Mutex<T> = super::Lock<T, MutexBackend>;
+/// A [`Guard`] acquired from locking a [`Mutex`].
+///
+/// This is simply a type alias for a [`Guard`] returned from locking a [`Mutex`]. It will unlock
+/// the [`Mutex`] upon being dropped.
+///
+/// [`Guard`]: super::Guard
+pub type MutexGuard<'a, T> = super::Guard<'a, T, MutexBackend>;
+
/// A kernel `struct mutex` lock backend.
pub struct MutexBackend;
@@ -97,7 +104,7 @@ unsafe impl super::Backend for MutexBackend {
unsafe fn init(
ptr: *mut Self::State,
- name: *const core::ffi::c_char,
+ name: *const crate::ffi::c_char,
key: *mut bindings::lock_class_key,
) {
// SAFETY: The safety requirements ensure that `ptr` is valid for writes, and `name` and
@@ -116,4 +123,20 @@ unsafe impl super::Backend for MutexBackend {
// caller is the owner of the mutex.
unsafe { bindings::mutex_unlock(ptr) };
}
+
+ unsafe fn try_lock(ptr: *mut Self::State) -> Option<Self::GuardState> {
+ // SAFETY: The `ptr` pointer is guaranteed to be valid and initialized before use.
+ let result = unsafe { bindings::mutex_trylock(ptr) };
+
+ if result != 0 {
+ Some(())
+ } else {
+ None
+ }
+ }
+
+ unsafe fn assert_is_held(ptr: *mut Self::State) {
+ // SAFETY: The `ptr` pointer is guaranteed to be valid and initialized before use.
+ unsafe { bindings::mutex_assert_is_held(ptr) }
+ }
}
diff --git a/rust/kernel/sync/lock/spinlock.rs b/rust/kernel/sync/lock/spinlock.rs
index 068535ce1b29..ab2f8d075311 100644
--- a/rust/kernel/sync/lock/spinlock.rs
+++ b/rust/kernel/sync/lock/spinlock.rs
@@ -4,8 +4,6 @@
//!
//! This module allows Rust code to use the kernel's `spinlock_t`.
-use crate::bindings;
-
/// Creates a [`SpinLock`] initialiser with the given name and a newly-created lock class.
///
/// It uses the name if one is given, otherwise it generates one based on the file name and line
@@ -17,6 +15,7 @@ macro_rules! new_spinlock {
$inner, $crate::optional_name!($($name)?), $crate::static_lock_class!())
};
}
+pub use new_spinlock;
/// A spinlock.
///
@@ -33,7 +32,7 @@ macro_rules! new_spinlock {
/// contains an inner struct (`Inner`) that is protected by a spinlock.
///
/// ```
-/// use kernel::{init::InPlaceInit, init::PinInit, new_spinlock, pin_init, sync::SpinLock};
+/// use kernel::sync::{new_spinlock, SpinLock};
///
/// struct Inner {
/// a: u32,
@@ -57,7 +56,7 @@ macro_rules! new_spinlock {
/// }
///
/// // Allocate a boxed `Example`.
-/// let e = Box::pin_init(Example::new())?;
+/// let e = KBox::pin_init(Example::new(), GFP_KERNEL)?;
/// assert_eq!(e.c, 10);
/// assert_eq!(e.d.lock().a, 20);
/// assert_eq!(e.d.lock().b, 30);
@@ -88,6 +87,14 @@ pub type SpinLock<T> = super::Lock<T, SpinLockBackend>;
/// A kernel `spinlock_t` lock backend.
pub struct SpinLockBackend;
+/// A [`Guard`] acquired from locking a [`SpinLock`].
+///
+/// This is simply a type alias for a [`Guard`] returned from locking a [`SpinLock`]. It will unlock
+/// the [`SpinLock`] upon being dropped.
+///
+/// [`Guard`]: super::Guard
+pub type SpinLockGuard<'a, T> = super::Guard<'a, T, SpinLockBackend>;
+
// SAFETY: The underlying kernel `spinlock_t` object ensures mutual exclusion. `relock` uses the
// default implementation that always calls the same locking method.
unsafe impl super::Backend for SpinLockBackend {
@@ -96,7 +103,7 @@ unsafe impl super::Backend for SpinLockBackend {
unsafe fn init(
ptr: *mut Self::State,
- name: *const core::ffi::c_char,
+ name: *const crate::ffi::c_char,
key: *mut bindings::lock_class_key,
) {
// SAFETY: The safety requirements ensure that `ptr` is valid for writes, and `name` and
@@ -112,7 +119,23 @@ unsafe impl super::Backend for SpinLockBackend {
unsafe fn unlock(ptr: *mut Self::State, _guard_state: &Self::GuardState) {
// SAFETY: The safety requirements of this function ensure that `ptr` is valid and that the
- // caller is the owner of the mutex.
+ // caller is the owner of the spinlock.
unsafe { bindings::spin_unlock(ptr) }
}
+
+ unsafe fn try_lock(ptr: *mut Self::State) -> Option<Self::GuardState> {
+ // SAFETY: The `ptr` pointer is guaranteed to be valid and initialized before use.
+ let result = unsafe { bindings::spin_trylock(ptr) };
+
+ if result != 0 {
+ Some(())
+ } else {
+ None
+ }
+ }
+
+ unsafe fn assert_is_held(ptr: *mut Self::State) {
+ // SAFETY: The `ptr` pointer is guaranteed to be valid and initialized before use.
+ unsafe { bindings::spin_assert_is_held(ptr) }
+ }
}
diff --git a/rust/kernel/sync/locked_by.rs b/rust/kernel/sync/locked_by.rs
index b17ee5cd98f3..61f100a45b35 100644
--- a/rust/kernel/sync/locked_by.rs
+++ b/rust/kernel/sync/locked_by.rs
@@ -9,14 +9,17 @@ use core::{cell::UnsafeCell, mem::size_of, ptr};
/// Allows access to some data to be serialised by a lock that does not wrap it.
///
/// In most cases, data protected by a lock is wrapped by the appropriate lock type, e.g.,
-/// [`super::Mutex`] or [`super::SpinLock`]. [`LockedBy`] is meant for cases when this is not
-/// possible. For example, if a container has a lock and some data in the contained elements needs
+/// [`Mutex`] or [`SpinLock`]. [`LockedBy`] is meant for cases when this is not possible.
+/// For example, if a container has a lock and some data in the contained elements needs
/// to be protected by the same lock.
///
/// [`LockedBy`] wraps the data in lieu of another locking primitive, and only allows access to it
/// when the caller shows evidence that the 'external' lock is locked. It panics if the evidence
/// refers to the wrong instance of the lock.
///
+/// [`Mutex`]: super::Mutex
+/// [`SpinLock`]: super::SpinLock
+///
/// # Examples
///
/// The following is an example for illustrative purposes: `InnerDirectory::bytes_used` is an
@@ -40,7 +43,7 @@ use core::{cell::UnsafeCell, mem::size_of, ptr};
/// struct InnerDirectory {
/// /// The sum of the bytes used by all files.
/// bytes_used: u64,
-/// _files: Vec<File>,
+/// _files: KVec<File>,
/// }
///
/// struct Directory {
@@ -52,7 +55,7 @@ use core::{cell::UnsafeCell, mem::size_of, ptr};
/// fn print_bytes_used(dir: &Directory, file: &File) {
/// let guard = dir.inner.lock();
/// let inner_file = file.inner.access(&guard);
-/// pr_info!("{} {}", guard.bytes_used, inner_file.bytes_used);
+/// pr_info!("{} {}\n", guard.bytes_used, inner_file.bytes_used);
/// }
///
/// /// Increments `bytes_used` for both the directory and file.
@@ -80,8 +83,12 @@ pub struct LockedBy<T: ?Sized, U: ?Sized> {
// SAFETY: `LockedBy` can be transferred across thread boundaries iff the data it protects can.
unsafe impl<T: ?Sized + Send, U: ?Sized> Send for LockedBy<T, U> {}
-// SAFETY: `LockedBy` serialises the interior mutability it provides, so it is `Sync` as long as the
-// data it protects is `Send`.
+// SAFETY: If `T` is not `Sync`, then parallel shared access to this `LockedBy` allows you to use
+// `access_mut` to hand out `&mut T` on one thread at the time. The requirement that `T: Send` is
+// sufficient to allow that.
+//
+// If `T` is `Sync`, then the `access` method also becomes available, which allows you to obtain
+// several `&T` from several threads at once. However, this is okay as `T` is `Sync`.
unsafe impl<T: ?Sized + Send, U: ?Sized> Sync for LockedBy<T, U> {}
impl<T, U> LockedBy<T, U> {
@@ -115,7 +122,10 @@ impl<T: ?Sized, U> LockedBy<T, U> {
///
/// Panics if `owner` is different from the data protected by the lock used in
/// [`new`](LockedBy::new).
- pub fn access<'a>(&'a self, owner: &'a U) -> &'a T {
+ pub fn access<'a>(&'a self, owner: &'a U) -> &'a T
+ where
+ T: Sync,
+ {
build_assert!(
size_of::<U>() > 0,
"`U` cannot be a ZST because `owner` wouldn't be unique"
@@ -124,7 +134,10 @@ impl<T: ?Sized, U> LockedBy<T, U> {
panic!("mismatched owners");
}
- // SAFETY: `owner` is evidence that the owner is locked.
+ // SAFETY: `owner` is evidence that there are only shared references to the owner for the
+ // duration of 'a, so it's not possible to use `Self::access_mut` to obtain a mutable
+ // reference to the inner value that aliases with this shared reference. The type is `Sync`
+ // so there are no other requirements.
unsafe { &*self.data.get() }
}
diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs
new file mode 100644
index 000000000000..d5f17153b424
--- /dev/null
+++ b/rust/kernel/sync/poll.rs
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Utilities for working with `struct poll_table`.
+
+use crate::{
+ bindings,
+ fs::File,
+ prelude::*,
+ sync::{CondVar, LockClassKey},
+ types::Opaque,
+};
+use core::ops::Deref;
+
+/// Creates a [`PollCondVar`] initialiser with the given name and a newly-created lock class.
+#[macro_export]
+macro_rules! new_poll_condvar {
+ ($($name:literal)?) => {
+ $crate::sync::poll::PollCondVar::new(
+ $crate::optional_name!($($name)?), $crate::static_lock_class!()
+ )
+ };
+}
+
+/// Wraps the kernel's `struct poll_table`.
+///
+/// # Invariants
+///
+/// This struct contains a valid `struct poll_table`.
+///
+/// For a `struct poll_table` to be valid, its `_qproc` function must follow the safety
+/// requirements of `_qproc` functions:
+///
+/// * The `_qproc` function is given permission to enqueue a waiter to the provided `poll_table`
+/// during the call. Once the waiter is removed and an rcu grace period has passed, it must no
+/// longer access the `wait_queue_head`.
+#[repr(transparent)]
+pub struct PollTable(Opaque<bindings::poll_table>);
+
+impl PollTable {
+ /// Creates a reference to a [`PollTable`] from a valid pointer.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that for the duration of 'a, the pointer will point at a valid poll
+ /// table (as defined in the type invariants).
+ ///
+ /// The caller must also ensure that the `poll_table` is only accessed via the returned
+ /// reference for the duration of 'a.
+ pub unsafe fn from_ptr<'a>(ptr: *mut bindings::poll_table) -> &'a mut PollTable {
+ // SAFETY: The safety requirements guarantee the validity of the dereference, while the
+ // `PollTable` type being transparent makes the cast ok.
+ unsafe { &mut *ptr.cast() }
+ }
+
+ fn get_qproc(&self) -> bindings::poll_queue_proc {
+ let ptr = self.0.get();
+ // SAFETY: The `ptr` is valid because it originates from a reference, and the `_qproc`
+ // field is not modified concurrently with this call since we have an immutable reference.
+ unsafe { (*ptr)._qproc }
+ }
+
+ /// Register this [`PollTable`] with the provided [`PollCondVar`], so that it can be notified
+ /// using the condition variable.
+ pub fn register_wait(&mut self, file: &File, cv: &PollCondVar) {
+ if let Some(qproc) = self.get_qproc() {
+ // SAFETY: The pointers to `file` and `self` need to be valid for the duration of this
+ // call to `qproc`, which they are because they are references.
+ //
+ // The `cv.wait_queue_head` pointer must be valid until an rcu grace period after the
+ // waiter is removed. The `PollCondVar` is pinned, so before `cv.wait_queue_head` can
+ // be destroyed, the destructor must run. That destructor first removes all waiters,
+ // and then waits for an rcu grace period. Therefore, `cv.wait_queue_head` is valid for
+ // long enough.
+ unsafe { qproc(file.as_ptr() as _, cv.wait_queue_head.get(), self.0.get()) };
+ }
+ }
+}
+
+/// A wrapper around [`CondVar`] that makes it usable with [`PollTable`].
+///
+/// [`CondVar`]: crate::sync::CondVar
+#[pin_data(PinnedDrop)]
+pub struct PollCondVar {
+ #[pin]
+ inner: CondVar,
+}
+
+impl PollCondVar {
+ /// Constructs a new condvar initialiser.
+ pub fn new(name: &'static CStr, key: &'static LockClassKey) -> impl PinInit<Self> {
+ pin_init!(Self {
+ inner <- CondVar::new(name, key),
+ })
+ }
+}
+
+// Make the `CondVar` methods callable on `PollCondVar`.
+impl Deref for PollCondVar {
+ type Target = CondVar;
+
+ fn deref(&self) -> &CondVar {
+ &self.inner
+ }
+}
+
+#[pinned_drop]
+impl PinnedDrop for PollCondVar {
+ fn drop(self: Pin<&mut Self>) {
+ // Clear anything registered using `register_wait`.
+ //
+ // SAFETY: The pointer points at a valid `wait_queue_head`.
+ unsafe { bindings::__wake_up_pollfree(self.inner.wait_queue_head.get()) };
+
+ // Wait for epoll items to be properly removed.
+ //
+ // SAFETY: Just an FFI call.
+ unsafe { bindings::synchronize_rcu() };
+ }
+}
diff --git a/rust/kernel/sync/rcu.rs b/rust/kernel/sync/rcu.rs
new file mode 100644
index 000000000000..b51d9150ffe2
--- /dev/null
+++ b/rust/kernel/sync/rcu.rs
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! RCU support.
+//!
+//! C header: [`include/linux/rcupdate.h`](srctree/include/linux/rcupdate.h)
+
+use crate::{bindings, types::NotThreadSafe};
+
+/// Evidence that the RCU read side lock is held on the current thread/CPU.
+///
+/// The type is explicitly not `Send` because this property is per-thread/CPU.
+///
+/// # Invariants
+///
+/// The RCU read side lock is actually held while instances of this guard exist.
+pub struct Guard(NotThreadSafe);
+
+impl Guard {
+ /// Acquires the RCU read side lock and returns a guard.
+ pub fn new() -> Self {
+ // SAFETY: An FFI call with no additional requirements.
+ unsafe { bindings::rcu_read_lock() };
+ // INVARIANT: The RCU read side lock was just acquired above.
+ Self(NotThreadSafe)
+ }
+
+ /// Explicitly releases the RCU read side lock.
+ pub fn unlock(self) {}
+}
+
+impl Default for Guard {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl Drop for Guard {
+ fn drop(&mut self) {
+ // SAFETY: By the type invariants, the RCU read side is locked, so it is ok to unlock it.
+ unsafe { bindings::rcu_read_unlock() };
+ }
+}
+
+/// Acquires the RCU read side lock.
+pub fn read_lock() -> Guard {
+ Guard::new()
+}
diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
index 9451932d5d86..38da555a2bdb 100644
--- a/rust/kernel/task.rs
+++ b/rust/kernel/task.rs
@@ -4,8 +4,28 @@
//!
//! C header: [`include/linux/sched.h`](srctree/include/linux/sched.h).
-use crate::{bindings, types::Opaque};
-use core::{marker::PhantomData, ops::Deref, ptr};
+use crate::{
+ bindings,
+ ffi::{c_int, c_long, c_uint},
+ pid_namespace::PidNamespace,
+ types::{ARef, NotThreadSafe, Opaque},
+};
+use core::{
+ cmp::{Eq, PartialEq},
+ ops::Deref,
+ ptr,
+};
+
+/// A sentinel value used for infinite timeouts.
+pub const MAX_SCHEDULE_TIMEOUT: c_long = c_long::MAX;
+
+/// Bitmask for tasks that are sleeping in an interruptible state.
+pub const TASK_INTERRUPTIBLE: c_int = bindings::TASK_INTERRUPTIBLE as c_int;
+/// Bitmask for tasks that are sleeping in an uninterruptible state.
+pub const TASK_UNINTERRUPTIBLE: c_int = bindings::TASK_UNINTERRUPTIBLE as c_int;
+/// Convenience constant for waking up tasks regardless of whether they are in interruptible or
+/// uninterruptible sleep.
+pub const TASK_NORMAL: c_uint = bindings::TASK_NORMAL as c_uint;
/// Returns the currently running task.
#[macro_export]
@@ -17,13 +37,23 @@ macro_rules! current {
};
}
+/// Returns the currently running task's pid namespace.
+#[macro_export]
+macro_rules! current_pid_ns {
+ () => {
+ // SAFETY: Deref + addr-of below create a temporary `PidNamespaceRef` that cannot outlive
+ // the caller.
+ unsafe { &*$crate::task::Task::current_pid_ns() }
+ };
+}
+
/// Wraps the kernel's `struct task_struct`.
///
/// # Invariants
///
/// All instances are valid tasks created by the C portion of the kernel.
///
-/// Instances of this type are always ref-counted, that is, a call to `get_task_struct` ensures
+/// Instances of this type are always refcounted, that is, a call to `get_task_struct` ensures
/// that the allocation remains valid at least until the matching call to `put_task_struct`.
///
/// # Examples
@@ -78,7 +108,22 @@ unsafe impl Sync for Task {}
/// The type of process identifiers (PIDs).
type Pid = bindings::pid_t;
+/// The type of user identifiers (UIDs).
+#[derive(Copy, Clone)]
+pub struct Kuid {
+ kuid: bindings::kuid_t,
+}
+
impl Task {
+ /// Returns a raw pointer to the current task.
+ ///
+ /// It is up to the user to use the pointer correctly.
+ #[inline]
+ pub fn current_raw() -> *mut bindings::task_struct {
+ // SAFETY: Getting the current pointer is always safe.
+ unsafe { bindings::get_current() }
+ }
+
/// Returns a task reference for the currently executing task/thread.
///
/// The recommended way to get the current task/thread is to use the
@@ -90,7 +135,7 @@ impl Task {
pub unsafe fn current() -> impl Deref<Target = Task> {
struct TaskRef<'a> {
task: &'a Task,
- _not_send: PhantomData<*mut ()>,
+ _not_send: NotThreadSafe,
}
impl Deref for TaskRef<'_> {
@@ -101,23 +146,118 @@ impl Task {
}
}
- // SAFETY: Just an FFI call with no additional safety requirements.
- let ptr = unsafe { bindings::get_current() };
-
+ let current = Task::current_raw();
TaskRef {
// SAFETY: If the current thread is still running, the current task is valid. Given
// that `TaskRef` is not `Send`, we know it cannot be transferred to another thread
// (where it could potentially outlive the caller).
- task: unsafe { &*ptr.cast() },
- _not_send: PhantomData,
+ task: unsafe { &*current.cast() },
+ _not_send: NotThreadSafe,
+ }
+ }
+
+ /// Returns a PidNamespace reference for the currently executing task's/thread's pid namespace.
+ ///
+ /// This function can be used to create an unbounded lifetime by e.g., storing the returned
+ /// PidNamespace in a global variable which would be a bug. So the recommended way to get the
+ /// current task's/thread's pid namespace is to use the [`current_pid_ns`] macro because it is
+ /// safe.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that the returned object doesn't outlive the current task/thread.
+ pub unsafe fn current_pid_ns() -> impl Deref<Target = PidNamespace> {
+ struct PidNamespaceRef<'a> {
+ task: &'a PidNamespace,
+ _not_send: NotThreadSafe,
+ }
+
+ impl Deref for PidNamespaceRef<'_> {
+ type Target = PidNamespace;
+
+ fn deref(&self) -> &Self::Target {
+ self.task
+ }
}
+
+ // The lifetime of `PidNamespace` is bound to `Task` and `struct pid`.
+ //
+ // The `PidNamespace` of a `Task` doesn't ever change once the `Task` is alive. A
+ // `unshare(CLONE_NEWPID)` or `setns(fd_pidns/pidfd, CLONE_NEWPID)` will not have an effect
+ // on the calling `Task`'s pid namespace. It will only effect the pid namespace of children
+ // created by the calling `Task`. This invariant guarantees that after having acquired a
+ // reference to a `Task`'s pid namespace it will remain unchanged.
+ //
+ // When a task has exited and been reaped `release_task()` will be called. This will set
+ // the `PidNamespace` of the task to `NULL`. So retrieving the `PidNamespace` of a task
+ // that is dead will return `NULL`. Note, that neither holding the RCU lock nor holding a
+ // referencing count to
+ // the `Task` will prevent `release_task()` being called.
+ //
+ // In order to retrieve the `PidNamespace` of a `Task` the `task_active_pid_ns()` function
+ // can be used. There are two cases to consider:
+ //
+ // (1) retrieving the `PidNamespace` of the `current` task
+ // (2) retrieving the `PidNamespace` of a non-`current` task
+ //
+ // From system call context retrieving the `PidNamespace` for case (1) is always safe and
+ // requires neither RCU locking nor a reference count to be held. Retrieving the
+ // `PidNamespace` after `release_task()` for current will return `NULL` but no codepath
+ // like that is exposed to Rust.
+ //
+ // Retrieving the `PidNamespace` from system call context for (2) requires RCU protection.
+ // Accessing `PidNamespace` outside of RCU protection requires a reference count that
+ // must've been acquired while holding the RCU lock. Note that accessing a non-`current`
+ // task means `NULL` can be returned as the non-`current` task could have already passed
+ // through `release_task()`.
+ //
+ // To retrieve (1) the `current_pid_ns!()` macro should be used which ensure that the
+ // returned `PidNamespace` cannot outlive the calling scope. The associated
+ // `current_pid_ns()` function should not be called directly as it could be abused to
+ // created an unbounded lifetime for `PidNamespace`. The `current_pid_ns!()` macro allows
+ // Rust to handle the common case of accessing `current`'s `PidNamespace` without RCU
+ // protection and without having to acquire a reference count.
+ //
+ // For (2) the `task_get_pid_ns()` method must be used. This will always acquire a
+ // reference on `PidNamespace` and will return an `Option` to force the caller to
+ // explicitly handle the case where `PidNamespace` is `None`, something that tends to be
+ // forgotten when doing the equivalent operation in `C`. Missing RCU primitives make it
+ // difficult to perform operations that are otherwise safe without holding a reference
+ // count as long as RCU protection is guaranteed. But it is not important currently. But we
+ // do want it in the future.
+ //
+ // Note for (2) the required RCU protection around calling `task_active_pid_ns()`
+ // synchronizes against putting the last reference of the associated `struct pid` of
+ // `task->thread_pid`. The `struct pid` stored in that field is used to retrieve the
+ // `PidNamespace` of the caller. When `release_task()` is called `task->thread_pid` will be
+ // `NULL`ed and `put_pid()` on said `struct pid` will be delayed in `free_pid()` via
+ // `call_rcu()` allowing everyone with an RCU protected access to the `struct pid` acquired
+ // from `task->thread_pid` to finish.
+ //
+ // SAFETY: The current task's pid namespace is valid as long as the current task is running.
+ let pidns = unsafe { bindings::task_active_pid_ns(Task::current_raw()) };
+ PidNamespaceRef {
+ // SAFETY: If the current thread is still running, the current task and its associated
+ // pid namespace are valid. `PidNamespaceRef` is not `Send`, so we know it cannot be
+ // transferred to another thread (where it could potentially outlive the current
+ // `Task`). The caller needs to ensure that the PidNamespaceRef doesn't outlive the
+ // current task/thread.
+ task: unsafe { PidNamespace::from_ptr(pidns) },
+ _not_send: NotThreadSafe,
+ }
+ }
+
+ /// Returns a raw pointer to the task.
+ #[inline]
+ pub fn as_ptr(&self) -> *mut bindings::task_struct {
+ self.0.get()
}
/// Returns the group leader of the given task.
pub fn group_leader(&self) -> &Task {
- // SAFETY: By the type invariant, we know that `self.0` is a valid task. Valid tasks always
- // have a valid group_leader.
- let ptr = unsafe { *ptr::addr_of!((*self.0.get()).group_leader) };
+ // SAFETY: The group leader of a task never changes after initialization, so reading this
+ // field is not a data race.
+ let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) };
// SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`,
// and given that a task has a reference to its group leader, we know it must be valid for
@@ -127,31 +267,70 @@ impl Task {
/// Returns the PID of the given task.
pub fn pid(&self) -> Pid {
- // SAFETY: By the type invariant, we know that `self.0` is a valid task. Valid tasks always
- // have a valid pid.
- unsafe { *ptr::addr_of!((*self.0.get()).pid) }
+ // SAFETY: The pid of a task never changes after initialization, so reading this field is
+ // not a data race.
+ unsafe { *ptr::addr_of!((*self.as_ptr()).pid) }
+ }
+
+ /// Returns the UID of the given task.
+ pub fn uid(&self) -> Kuid {
+ // SAFETY: It's always safe to call `task_uid` on a valid task.
+ Kuid::from_raw(unsafe { bindings::task_uid(self.as_ptr()) })
+ }
+
+ /// Returns the effective UID of the given task.
+ pub fn euid(&self) -> Kuid {
+ // SAFETY: It's always safe to call `task_euid` on a valid task.
+ Kuid::from_raw(unsafe { bindings::task_euid(self.as_ptr()) })
}
/// Determines whether the given task has pending signals.
pub fn signal_pending(&self) -> bool {
+ // SAFETY: It's always safe to call `signal_pending` on a valid task.
+ unsafe { bindings::signal_pending(self.as_ptr()) != 0 }
+ }
+
+ /// Returns task's pid namespace with elevated reference count
+ pub fn get_pid_ns(&self) -> Option<ARef<PidNamespace>> {
// SAFETY: By the type invariant, we know that `self.0` is valid.
- unsafe { bindings::signal_pending(self.0.get()) != 0 }
+ let ptr = unsafe { bindings::task_get_pid_ns(self.as_ptr()) };
+ if ptr.is_null() {
+ None
+ } else {
+ // SAFETY: `ptr` is valid by the safety requirements of this function. And we own a
+ // reference count via `task_get_pid_ns()`.
+ // CAST: `Self` is a `repr(transparent)` wrapper around `bindings::pid_namespace`.
+ Some(unsafe { ARef::from_raw(ptr::NonNull::new_unchecked(ptr.cast::<PidNamespace>())) })
+ }
+ }
+
+ /// Returns the given task's pid in the provided pid namespace.
+ #[doc(alias = "task_tgid_nr_ns")]
+ pub fn tgid_nr_ns(&self, pidns: Option<&PidNamespace>) -> Pid {
+ let pidns = match pidns {
+ Some(pidns) => pidns.as_ptr(),
+ None => core::ptr::null_mut(),
+ };
+ // SAFETY: By the type invariant, we know that `self.0` is valid. We received a valid
+ // PidNamespace that we can use as a pointer or we received an empty PidNamespace and
+ // thus pass a null pointer. The underlying C function is safe to be used with NULL
+ // pointers.
+ unsafe { bindings::task_tgid_nr_ns(self.as_ptr(), pidns) }
}
/// Wakes up the task.
pub fn wake_up(&self) {
- // SAFETY: By the type invariant, we know that `self.0.get()` is non-null and valid.
- // And `wake_up_process` is safe to be called for any valid task, even if the task is
+ // SAFETY: It's always safe to call `wake_up_process` on a valid task, even if the task
// running.
- unsafe { bindings::wake_up_process(self.0.get()) };
+ unsafe { bindings::wake_up_process(self.as_ptr()) };
}
}
-// SAFETY: The type invariants guarantee that `Task` is always ref-counted.
+// SAFETY: The type invariants guarantee that `Task` is always refcounted.
unsafe impl crate::types::AlwaysRefCounted for Task {
fn inc_ref(&self) {
// SAFETY: The existence of a shared reference means that the refcount is nonzero.
- unsafe { bindings::get_task_struct(self.0.get()) };
+ unsafe { bindings::get_task_struct(self.as_ptr()) };
}
unsafe fn dec_ref(obj: ptr::NonNull<Self>) {
@@ -159,3 +338,43 @@ unsafe impl crate::types::AlwaysRefCounted for Task {
unsafe { bindings::put_task_struct(obj.cast().as_ptr()) }
}
}
+
+impl Kuid {
+ /// Get the current euid.
+ #[inline]
+ pub fn current_euid() -> Kuid {
+ // SAFETY: Just an FFI call.
+ Self::from_raw(unsafe { bindings::current_euid() })
+ }
+
+ /// Create a `Kuid` given the raw C type.
+ #[inline]
+ pub fn from_raw(kuid: bindings::kuid_t) -> Self {
+ Self { kuid }
+ }
+
+ /// Turn this kuid into the raw C type.
+ #[inline]
+ pub fn into_raw(self) -> bindings::kuid_t {
+ self.kuid
+ }
+
+ /// Converts this kernel UID into a userspace UID.
+ ///
+ /// Uses the namespace of the current task.
+ #[inline]
+ pub fn into_uid_in_current_ns(self) -> bindings::uid_t {
+ // SAFETY: Just an FFI call.
+ unsafe { bindings::from_kuid(bindings::current_user_ns(), self.kuid) }
+ }
+}
+
+impl PartialEq for Kuid {
+ #[inline]
+ fn eq(&self, other: &Kuid) -> bool {
+ // SAFETY: Just an FFI call.
+ unsafe { bindings::uid_eq(self.kuid, other.kuid) }
+ }
+}
+
+impl Eq for Kuid {}
diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs
new file mode 100644
index 000000000000..379c0f5772e5
--- /dev/null
+++ b/rust/kernel/time.rs
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Time related primitives.
+//!
+//! This module contains the kernel APIs related to time and timers that
+//! have been ported or wrapped for usage by Rust code in the kernel.
+//!
+//! C header: [`include/linux/jiffies.h`](srctree/include/linux/jiffies.h).
+//! C header: [`include/linux/ktime.h`](srctree/include/linux/ktime.h).
+
+/// The number of nanoseconds per millisecond.
+pub const NSEC_PER_MSEC: i64 = bindings::NSEC_PER_MSEC as i64;
+
+/// The time unit of Linux kernel. One jiffy equals (1/HZ) second.
+pub type Jiffies = crate::ffi::c_ulong;
+
+/// The millisecond time unit.
+pub type Msecs = crate::ffi::c_uint;
+
+/// Converts milliseconds to jiffies.
+#[inline]
+pub fn msecs_to_jiffies(msecs: Msecs) -> Jiffies {
+ // SAFETY: The `__msecs_to_jiffies` function is always safe to call no
+ // matter what the argument is.
+ unsafe { bindings::__msecs_to_jiffies(msecs) }
+}
+
+/// A Rust wrapper around a `ktime_t`.
+#[repr(transparent)]
+#[derive(Copy, Clone)]
+pub struct Ktime {
+ inner: bindings::ktime_t,
+}
+
+impl Ktime {
+ /// Create a `Ktime` from a raw `ktime_t`.
+ #[inline]
+ pub fn from_raw(inner: bindings::ktime_t) -> Self {
+ Self { inner }
+ }
+
+ /// Get the current time using `CLOCK_MONOTONIC`.
+ #[inline]
+ pub fn ktime_get() -> Self {
+ // SAFETY: It is always safe to call `ktime_get` outside of NMI context.
+ Self::from_raw(unsafe { bindings::ktime_get() })
+ }
+
+ /// Divide the number of nanoseconds by a compile-time constant.
+ #[inline]
+ fn divns_constant<const DIV: i64>(self) -> i64 {
+ self.to_ns() / DIV
+ }
+
+ /// Returns the number of nanoseconds.
+ #[inline]
+ pub fn to_ns(self) -> i64 {
+ self.inner
+ }
+
+ /// Returns the number of milliseconds.
+ #[inline]
+ pub fn to_ms(self) -> i64 {
+ self.divns_constant::<NSEC_PER_MSEC>()
+ }
+}
+
+/// Returns the number of milliseconds between two ktimes.
+#[inline]
+pub fn ktime_ms_delta(later: Ktime, earlier: Ktime) -> i64 {
+ (later - earlier).to_ms()
+}
+
+impl core::ops::Sub for Ktime {
+ type Output = Ktime;
+
+ #[inline]
+ fn sub(self, other: Ktime) -> Ktime {
+ Self {
+ inner: self.inner - other.inner,
+ }
+ }
+}
diff --git a/rust/kernel/tracepoint.rs b/rust/kernel/tracepoint.rs
new file mode 100644
index 000000000000..c6e80aa99e8e
--- /dev/null
+++ b/rust/kernel/tracepoint.rs
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Logic for tracepoints.
+
+/// Declare the Rust entry point for a tracepoint.
+///
+/// This macro generates an unsafe function that calls into C, and its safety requirements will be
+/// whatever the relevant C code requires. To document these safety requirements, you may add
+/// doc-comments when invoking the macro.
+#[macro_export]
+macro_rules! declare_trace {
+ ($($(#[$attr:meta])* $pub:vis unsafe fn $name:ident($($argname:ident : $argtyp:ty),* $(,)?);)*) => {$(
+ $( #[$attr] )*
+ #[inline(always)]
+ $pub unsafe fn $name($($argname : $argtyp),*) {
+ #[cfg(CONFIG_TRACEPOINTS)]
+ {
+ // SAFETY: It's always okay to query the static key for a tracepoint.
+ let should_trace = unsafe {
+ $crate::macros::paste! {
+ $crate::jump_label::static_branch_unlikely!(
+ $crate::bindings::[< __tracepoint_ $name >],
+ $crate::bindings::tracepoint,
+ key
+ )
+ }
+ };
+
+ if should_trace {
+ $crate::macros::paste! {
+ // SAFETY: The caller guarantees that it is okay to call this tracepoint.
+ unsafe { $crate::bindings::[< rust_do_trace_ $name >]($($argname),*) };
+ }
+ }
+ }
+
+ #[cfg(not(CONFIG_TRACEPOINTS))]
+ {
+ // If tracepoints are disabled, insert a trivial use of each argument
+ // to avoid unused argument warnings.
+ $( let _unused = $argname; )*
+ }
+ }
+ )*}
+}
+
+pub use declare_trace;
diff --git a/rust/kernel/transmute.rs b/rust/kernel/transmute.rs
new file mode 100644
index 000000000000..1c7d43771a37
--- /dev/null
+++ b/rust/kernel/transmute.rs
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Traits for transmuting types.
+
+/// Types for which any bit pattern is valid.
+///
+/// Not all types are valid for all values. For example, a `bool` must be either zero or one, so
+/// reading arbitrary bytes into something that contains a `bool` is not okay.
+///
+/// It's okay for the type to have padding, as initializing those bytes has no effect.
+///
+/// # Safety
+///
+/// All bit-patterns must be valid for this type. This type must not have interior mutability.
+pub unsafe trait FromBytes {}
+
+macro_rules! impl_frombytes {
+ ($($({$($generics:tt)*})? $t:ty, )*) => {
+ // SAFETY: Safety comments written in the macro invocation.
+ $(unsafe impl$($($generics)*)? FromBytes for $t {})*
+ };
+}
+
+impl_frombytes! {
+ // SAFETY: All bit patterns are acceptable values of the types below.
+ u8, u16, u32, u64, usize,
+ i8, i16, i32, i64, isize,
+
+ // SAFETY: If all bit patterns are acceptable for individual values in an array, then all bit
+ // patterns are also acceptable for arrays of that type.
+ {<T: FromBytes>} [T],
+ {<T: FromBytes, const N: usize>} [T; N],
+}
+
+/// Types that can be viewed as an immutable slice of initialized bytes.
+///
+/// If a struct implements this trait, then it is okay to copy it byte-for-byte to userspace. This
+/// means that it should not have any padding, as padding bytes are uninitialized. Reading
+/// uninitialized memory is not just undefined behavior, it may even lead to leaking sensitive
+/// information on the stack to userspace.
+///
+/// The struct should also not hold kernel pointers, as kernel pointer addresses are also considered
+/// sensitive. However, leaking kernel pointers is not considered undefined behavior by Rust, so
+/// this is a correctness requirement, but not a safety requirement.
+///
+/// # Safety
+///
+/// Values of this type may not contain any uninitialized bytes. This type must not have interior
+/// mutability.
+pub unsafe trait AsBytes {}
+
+macro_rules! impl_asbytes {
+ ($($({$($generics:tt)*})? $t:ty, )*) => {
+ // SAFETY: Safety comments written in the macro invocation.
+ $(unsafe impl$($($generics)*)? AsBytes for $t {})*
+ };
+}
+
+impl_asbytes! {
+ // SAFETY: Instances of the following types have no uninitialized portions.
+ u8, u16, u32, u64, usize,
+ i8, i16, i32, i64, isize,
+ bool,
+ char,
+ str,
+
+ // SAFETY: If individual values in an array have no uninitialized portions, then the array
+ // itself does not have any uninitialized portions either.
+ {<T: AsBytes>} [T],
+ {<T: AsBytes, const N: usize>} [T; N],
+}
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index fdb778e65d79..2bbaab83b9d6 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -3,11 +3,10 @@
//! Kernel types.
use crate::init::{self, PinInit};
-use alloc::boxed::Box;
use core::{
cell::UnsafeCell,
marker::{PhantomData, PhantomPinned},
- mem::MaybeUninit,
+ mem::{ManuallyDrop, MaybeUninit},
ops::{Deref, DerefMut},
ptr::NonNull,
};
@@ -20,66 +19,111 @@ use core::{
/// This trait is meant to be used in cases when Rust objects are stored in C objects and
/// eventually "freed" back to Rust.
pub trait ForeignOwnable: Sized {
- /// Type of values borrowed between calls to [`ForeignOwnable::into_foreign`] and
- /// [`ForeignOwnable::from_foreign`].
+ /// Type used to immutably borrow a value that is currently foreign-owned.
type Borrowed<'a>;
+ /// Type used to mutably borrow a value that is currently foreign-owned.
+ type BorrowedMut<'a>;
+
/// Converts a Rust-owned object to a foreign-owned one.
///
- /// The foreign representation is a pointer to void.
- fn into_foreign(self) -> *const core::ffi::c_void;
+ /// The foreign representation is a pointer to void. There are no guarantees for this pointer.
+ /// For example, it might be invalid, dangling or pointing to uninitialized memory. Using it in
+ /// any way except for [`from_foreign`], [`try_from_foreign`], [`borrow`], or [`borrow_mut`] can
+ /// result in undefined behavior.
+ ///
+ /// [`from_foreign`]: Self::from_foreign
+ /// [`try_from_foreign`]: Self::try_from_foreign
+ /// [`borrow`]: Self::borrow
+ /// [`borrow_mut`]: Self::borrow_mut
+ fn into_foreign(self) -> *mut crate::ffi::c_void;
- /// Borrows a foreign-owned object.
+ /// Converts a foreign-owned object back to a Rust-owned one.
///
/// # Safety
///
- /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
- /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
- unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> Self::Borrowed<'a>;
+ /// The provided pointer must have been returned by a previous call to [`into_foreign`], and it
+ /// must not be passed to `from_foreign` more than once.
+ ///
+ /// [`into_foreign`]: Self::into_foreign
+ unsafe fn from_foreign(ptr: *mut crate::ffi::c_void) -> Self;
- /// Converts a foreign-owned object back to a Rust-owned one.
+ /// Tries to convert a foreign-owned object back to a Rust-owned one.
+ ///
+ /// A convenience wrapper over [`ForeignOwnable::from_foreign`] that returns [`None`] if `ptr`
+ /// is null.
///
/// # Safety
///
- /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
- /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
- /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] for
- /// this object must have been dropped.
- unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self;
-}
-
-impl<T: 'static> ForeignOwnable for Box<T> {
- type Borrowed<'a> = &'a T;
-
- fn into_foreign(self) -> *const core::ffi::c_void {
- Box::into_raw(self) as _
+ /// `ptr` must either be null or satisfy the safety requirements for [`from_foreign`].
+ ///
+ /// [`from_foreign`]: Self::from_foreign
+ unsafe fn try_from_foreign(ptr: *mut crate::ffi::c_void) -> Option<Self> {
+ if ptr.is_null() {
+ None
+ } else {
+ // SAFETY: Since `ptr` is not null here, then `ptr` satisfies the safety requirements
+ // of `from_foreign` given the safety requirements of this function.
+ unsafe { Some(Self::from_foreign(ptr)) }
+ }
}
- unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> &'a T {
- // SAFETY: The safety requirements for this function ensure that the object is still alive,
- // so it is safe to dereference the raw pointer.
- // The safety requirements of `from_foreign` also ensure that the object remains alive for
- // the lifetime of the returned value.
- unsafe { &*ptr.cast() }
- }
+ /// Borrows a foreign-owned object immutably.
+ ///
+ /// This method provides a way to access a foreign-owned value from Rust immutably. It provides
+ /// you with exactly the same abilities as an `&Self` when the value is Rust-owned.
+ ///
+ /// # Safety
+ ///
+ /// The provided pointer must have been returned by a previous call to [`into_foreign`], and if
+ /// the pointer is ever passed to [`from_foreign`], then that call must happen after the end of
+ /// the lifetime 'a.
+ ///
+ /// [`into_foreign`]: Self::into_foreign
+ /// [`from_foreign`]: Self::from_foreign
+ unsafe fn borrow<'a>(ptr: *mut crate::ffi::c_void) -> Self::Borrowed<'a>;
- unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self {
- // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous
- // call to `Self::into_foreign`.
- unsafe { Box::from_raw(ptr as _) }
- }
+ /// Borrows a foreign-owned object mutably.
+ ///
+ /// This method provides a way to access a foreign-owned value from Rust mutably. It provides
+ /// you with exactly the same abilities as an `&mut Self` when the value is Rust-owned, except
+ /// that the address of the object must not be changed.
+ ///
+ /// Note that for types like [`Arc`], an `&mut Arc<T>` only gives you immutable access to the
+ /// inner value, so this method also only provides immutable access in that case.
+ ///
+ /// In the case of `Box<T>`, this method gives you the ability to modify the inner `T`, but it
+ /// does not let you change the box itself. That is, you cannot change which allocation the box
+ /// points at.
+ ///
+ /// # Safety
+ ///
+ /// The provided pointer must have been returned by a previous call to [`into_foreign`], and if
+ /// the pointer is ever passed to [`from_foreign`], then that call must happen after the end of
+ /// the lifetime 'a.
+ ///
+ /// The lifetime 'a must not overlap with the lifetime of any other call to [`borrow`] or
+ /// `borrow_mut` on the same object.
+ ///
+ /// [`into_foreign`]: Self::into_foreign
+ /// [`from_foreign`]: Self::from_foreign
+ /// [`borrow`]: Self::borrow
+ /// [`Arc`]: crate::sync::Arc
+ unsafe fn borrow_mut<'a>(ptr: *mut crate::ffi::c_void) -> Self::BorrowedMut<'a>;
}
impl ForeignOwnable for () {
type Borrowed<'a> = ();
+ type BorrowedMut<'a> = ();
- fn into_foreign(self) -> *const core::ffi::c_void {
+ fn into_foreign(self) -> *mut crate::ffi::c_void {
core::ptr::NonNull::dangling().as_ptr()
}
- unsafe fn borrow<'a>(_: *const core::ffi::c_void) -> Self::Borrowed<'a> {}
+ unsafe fn from_foreign(_: *mut crate::ffi::c_void) -> Self {}
- unsafe fn from_foreign(_: *const core::ffi::c_void) -> Self {}
+ unsafe fn borrow<'a>(_: *mut crate::ffi::c_void) -> Self::Borrowed<'a> {}
+ unsafe fn borrow_mut<'a>(_: *mut crate::ffi::c_void) -> Self::BorrowedMut<'a> {}
}
/// Runs a cleanup function/closure when dropped.
@@ -90,6 +134,7 @@ impl ForeignOwnable for () {
///
/// In the example below, we have multiple exit paths and we want to log regardless of which one is
/// taken:
+///
/// ```
/// # use kernel::types::ScopeGuard;
/// fn example1(arg: bool) {
@@ -108,6 +153,7 @@ impl ForeignOwnable for () {
///
/// In the example below, we want to log the same message on all early exits but a different one on
/// the main exit path:
+///
/// ```
/// # use kernel::types::ScopeGuard;
/// fn example2(arg: bool) {
@@ -129,17 +175,18 @@ impl ForeignOwnable for () {
///
/// In the example below, we need a mutable object (the vector) to be accessible within the log
/// function, so we wrap it in the [`ScopeGuard`]:
+///
/// ```
/// # use kernel::types::ScopeGuard;
/// fn example3(arg: bool) -> Result {
/// let mut vec =
-/// ScopeGuard::new_with_data(Vec::new(), |v| pr_info!("vec had {} elements\n", v.len()));
+/// ScopeGuard::new_with_data(KVec::new(), |v| pr_info!("vec had {} elements\n", v.len()));
///
-/// vec.try_push(10u8)?;
+/// vec.push(10u8, GFP_KERNEL)?;
/// if arg {
/// return Ok(());
/// }
-/// vec.try_push(20u8)?;
+/// vec.push(20u8, GFP_KERNEL)?;
/// Ok(())
/// }
///
@@ -173,7 +220,7 @@ impl<T, F: FnOnce(T)> ScopeGuard<T, F> {
impl ScopeGuard<(), fn(())> {
/// Creates a new guarded object with the given cleanup function.
pub fn new(cleanup: impl FnOnce()) -> ScopeGuard<(), impl FnOnce(())> {
- ScopeGuard::new_with_data((), move |_| cleanup())
+ ScopeGuard::new_with_data((), move |()| cleanup())
}
}
@@ -204,7 +251,58 @@ impl<T, F: FnOnce(T)> Drop for ScopeGuard<T, F> {
/// Stores an opaque value.
///
-/// This is meant to be used with FFI objects that are never interpreted by Rust code.
+/// `Opaque<T>` is meant to be used with FFI objects that are never interpreted by Rust code.
+///
+/// It is used to wrap structs from the C side, like for example `Opaque<bindings::mutex>`.
+/// It gets rid of all the usual assumptions that Rust has for a value:
+///
+/// * The value is allowed to be uninitialized (for example have invalid bit patterns: `3` for a
+/// [`bool`]).
+/// * The value is allowed to be mutated, when a `&Opaque<T>` exists on the Rust side.
+/// * No uniqueness for mutable references: it is fine to have multiple `&mut Opaque<T>` point to
+/// the same value.
+/// * The value is not allowed to be shared with other threads (i.e. it is `!Sync`).
+///
+/// This has to be used for all values that the C side has access to, because it can't be ensured
+/// that the C side is adhering to the usual constraints that Rust needs.
+///
+/// Using `Opaque<T>` allows to continue to use references on the Rust side even for values shared
+/// with C.
+///
+/// # Examples
+///
+/// ```
+/// # #![expect(unreachable_pub, clippy::disallowed_names)]
+/// use kernel::types::Opaque;
+/// # // Emulate a C struct binding which is from C, maybe uninitialized or not, only the C side
+/// # // knows.
+/// # mod bindings {
+/// # pub struct Foo {
+/// # pub val: u8,
+/// # }
+/// # }
+///
+/// // `foo.val` is assumed to be handled on the C side, so we use `Opaque` to wrap it.
+/// pub struct Foo {
+/// foo: Opaque<bindings::Foo>,
+/// }
+///
+/// impl Foo {
+/// pub fn get_val(&self) -> u8 {
+/// let ptr = Opaque::get(&self.foo);
+///
+/// // SAFETY: `Self` is valid from C side.
+/// unsafe { (*ptr).val }
+/// }
+/// }
+///
+/// // Create an instance of `Foo` with the `Opaque` wrapper.
+/// let foo = Foo {
+/// foo: Opaque::new(bindings::Foo { val: 0xdb }),
+/// };
+///
+/// assert_eq!(foo.get_val(), 0xdb);
+/// ```
#[repr(transparent)]
pub struct Opaque<T> {
value: UnsafeCell<MaybeUninit<T>>,
@@ -228,6 +326,17 @@ impl<T> Opaque<T> {
}
}
+ /// Create an opaque pin-initializer from the given pin-initializer.
+ pub fn pin_init(slot: impl PinInit<T>) -> impl PinInit<Self> {
+ Self::ffi_init(|ptr: *mut T| {
+ // SAFETY:
+ // - `ptr` is a valid pointer to uninitialized memory,
+ // - `slot` is not accessed on error; the call is infallible,
+ // - `slot` is pinned in memory.
+ let _ = unsafe { init::PinInit::<T>::__pinned_init(slot, ptr) };
+ })
+ }
+
/// Creates a pin-initializer from the given initializer closure.
///
/// The returned initializer calls the given closure with the pointer to the inner `T` of this
@@ -247,8 +356,24 @@ impl<T> Opaque<T> {
}
}
+ /// Creates a fallible pin-initializer from the given initializer closure.
+ ///
+ /// The returned initializer calls the given closure with the pointer to the inner `T` of this
+ /// `Opaque`. Since this memory is uninitialized, the closure is not allowed to read from it.
+ ///
+ /// This function is safe, because the `T` inside of an `Opaque` is allowed to be
+ /// uninitialized. Additionally, access to the inner `T` requires `unsafe`, so the caller needs
+ /// to verify at that point that the inner value is valid.
+ pub fn try_ffi_init<E>(
+ init_func: impl FnOnce(*mut T) -> Result<(), E>,
+ ) -> impl PinInit<Self, E> {
+ // SAFETY: We contain a `MaybeUninit`, so it is OK for the `init_func` to not fully
+ // initialize the `T`.
+ unsafe { init::pin_init_from_closure::<_, E>(move |slot| init_func(Self::raw_get(slot))) }
+ }
+
/// Returns a raw pointer to the opaque data.
- pub fn get(&self) -> *mut T {
+ pub const fn get(&self) -> *mut T {
UnsafeCell::get(&self.value).cast::<T>()
}
@@ -344,6 +469,37 @@ impl<T: AlwaysRefCounted> ARef<T> {
_p: PhantomData,
}
}
+
+ /// Consumes the `ARef`, returning a raw pointer.
+ ///
+ /// This function does not change the refcount. After calling this function, the caller is
+ /// responsible for the refcount previously managed by the `ARef`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use core::ptr::NonNull;
+ /// use kernel::types::{ARef, AlwaysRefCounted};
+ ///
+ /// struct Empty {}
+ ///
+ /// # // SAFETY: TODO.
+ /// unsafe impl AlwaysRefCounted for Empty {
+ /// fn inc_ref(&self) {}
+ /// unsafe fn dec_ref(_obj: NonNull<Self>) {}
+ /// }
+ ///
+ /// let mut data = Empty {};
+ /// let ptr = NonNull::<Empty>::new(&mut data).unwrap();
+ /// # // SAFETY: TODO.
+ /// let data_ref: ARef<Empty> = unsafe { ARef::from_raw(ptr) };
+ /// let raw_ptr: NonNull<Empty> = ARef::into_raw(data_ref);
+ ///
+ /// assert_eq!(ptr, raw_ptr);
+ /// ```
+ pub fn into_raw(me: Self) -> NonNull<T> {
+ ManuallyDrop::new(me).ptr
+ }
}
impl<T: AlwaysRefCounted> Clone for ARef<T> {
@@ -380,6 +536,15 @@ impl<T: AlwaysRefCounted> Drop for ARef<T> {
}
/// A sum type that always holds either a value of type `L` or `R`.
+///
+/// # Examples
+///
+/// ```
+/// use kernel::types::Either;
+///
+/// let left_value: Either<i32, &str> = Either::Left(7);
+/// let right_value: Either<i32, &str> = Either::Right("right value");
+/// ```
pub enum Either<L, R> {
/// Constructs an instance of [`Either`] containing a value of type `L`.
Left(L),
@@ -387,3 +552,24 @@ pub enum Either<L, R> {
/// Constructs an instance of [`Either`] containing a value of type `R`.
Right(R),
}
+
+/// Zero-sized type to mark types not [`Send`].
+///
+/// Add this type as a field to your struct if your type should not be sent to a different task.
+/// Since [`Send`] is an auto trait, adding a single field that is `!Send` will ensure that the
+/// whole type is `!Send`.
+///
+/// If a type is `!Send` it is impossible to give control over an instance of the type to another
+/// task. This is useful to include in types that store or reference task-local information. A file
+/// descriptor is an example of such task-local information.
+///
+/// This type also makes the type `!Sync`, which prevents immutable access to the value from
+/// several threads in parallel.
+pub type NotThreadSafe = PhantomData<*mut ()>;
+
+/// Used to construct instances of type [`NotThreadSafe`] similar to how `PhantomData` is
+/// constructed.
+///
+/// [`NotThreadSafe`]: type@NotThreadSafe
+#[allow(non_upper_case_globals)]
+pub const NotThreadSafe: NotThreadSafe = PhantomData;
diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs
new file mode 100644
index 000000000000..719b0a48ff55
--- /dev/null
+++ b/rust/kernel/uaccess.rs
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Slices to user space memory regions.
+//!
+//! C header: [`include/linux/uaccess.h`](srctree/include/linux/uaccess.h)
+
+use crate::{
+ alloc::{Allocator, Flags},
+ bindings,
+ error::Result,
+ ffi::c_void,
+ prelude::*,
+ transmute::{AsBytes, FromBytes},
+};
+use core::mem::{size_of, MaybeUninit};
+
+/// The type used for userspace addresses.
+pub type UserPtr = usize;
+
+/// A pointer to an area in userspace memory, which can be either read-only or read-write.
+///
+/// All methods on this struct are safe: attempting to read or write on bad addresses (either out of
+/// the bound of the slice or unmapped addresses) will return [`EFAULT`]. Concurrent access,
+/// *including data races to/from userspace memory*, is permitted, because fundamentally another
+/// userspace thread/process could always be modifying memory at the same time (in the same way that
+/// userspace Rust's [`std::io`] permits data races with the contents of files on disk). In the
+/// presence of a race, the exact byte values read/written are unspecified but the operation is
+/// well-defined. Kernelspace code should validate its copy of data after completing a read, and not
+/// expect that multiple reads of the same address will return the same value.
+///
+/// These APIs are designed to make it difficult to accidentally write TOCTOU (time-of-check to
+/// time-of-use) bugs. Every time a memory location is read, the reader's position is advanced by
+/// the read length and the next read will start from there. This helps prevent accidentally reading
+/// the same location twice and causing a TOCTOU bug.
+///
+/// Creating a [`UserSliceReader`] and/or [`UserSliceWriter`] consumes the `UserSlice`, helping
+/// ensure that there aren't multiple readers or writers to the same location.
+///
+/// If double-fetching a memory location is necessary for some reason, then that is done by creating
+/// multiple readers to the same memory location, e.g. using [`clone_reader`].
+///
+/// # Examples
+///
+/// Takes a region of userspace memory from the current process, and modify it by adding one to
+/// every byte in the region.
+///
+/// ```no_run
+/// use kernel::ffi::c_void;
+/// use kernel::error::Result;
+/// use kernel::uaccess::{UserPtr, UserSlice};
+///
+/// fn bytes_add_one(uptr: UserPtr, len: usize) -> Result<()> {
+/// let (read, mut write) = UserSlice::new(uptr, len).reader_writer();
+///
+/// let mut buf = KVec::new();
+/// read.read_all(&mut buf, GFP_KERNEL)?;
+///
+/// for b in &mut buf {
+/// *b = b.wrapping_add(1);
+/// }
+///
+/// write.write_slice(&buf)?;
+/// Ok(())
+/// }
+/// ```
+///
+/// Example illustrating a TOCTOU (time-of-check to time-of-use) bug.
+///
+/// ```no_run
+/// use kernel::ffi::c_void;
+/// use kernel::error::{code::EINVAL, Result};
+/// use kernel::uaccess::{UserPtr, UserSlice};
+///
+/// /// Returns whether the data in this region is valid.
+/// fn is_valid(uptr: UserPtr, len: usize) -> Result<bool> {
+/// let read = UserSlice::new(uptr, len).reader();
+///
+/// let mut buf = KVec::new();
+/// read.read_all(&mut buf, GFP_KERNEL)?;
+///
+/// todo!()
+/// }
+///
+/// /// Returns the bytes behind this user pointer if they are valid.
+/// fn get_bytes_if_valid(uptr: UserPtr, len: usize) -> Result<KVec<u8>> {
+/// if !is_valid(uptr, len)? {
+/// return Err(EINVAL);
+/// }
+///
+/// let read = UserSlice::new(uptr, len).reader();
+///
+/// let mut buf = KVec::new();
+/// read.read_all(&mut buf, GFP_KERNEL)?;
+///
+/// // THIS IS A BUG! The bytes could have changed since we checked them.
+/// //
+/// // To avoid this kind of bug, don't call `UserSlice::new` multiple
+/// // times with the same address.
+/// Ok(buf)
+/// }
+/// ```
+///
+/// [`std::io`]: https://doc.rust-lang.org/std/io/index.html
+/// [`clone_reader`]: UserSliceReader::clone_reader
+pub struct UserSlice {
+ ptr: UserPtr,
+ length: usize,
+}
+
+impl UserSlice {
+ /// Constructs a user slice from a raw pointer and a length in bytes.
+ ///
+ /// Constructing a [`UserSlice`] performs no checks on the provided address and length, it can
+ /// safely be constructed inside a kernel thread with no current userspace process. Reads and
+ /// writes wrap the kernel APIs `copy_from_user` and `copy_to_user`, which check the memory map
+ /// of the current process and enforce that the address range is within the user range (no
+ /// additional calls to `access_ok` are needed). Validity of the pointer is checked when you
+ /// attempt to read or write, not in the call to `UserSlice::new`.
+ ///
+ /// Callers must be careful to avoid time-of-check-time-of-use (TOCTOU) issues. The simplest way
+ /// is to create a single instance of [`UserSlice`] per user memory block as it reads each byte
+ /// at most once.
+ pub fn new(ptr: UserPtr, length: usize) -> Self {
+ UserSlice { ptr, length }
+ }
+
+ /// Reads the entirety of the user slice, appending it to the end of the provided buffer.
+ ///
+ /// Fails with [`EFAULT`] if the read happens on a bad address.
+ pub fn read_all<A: Allocator>(self, buf: &mut Vec<u8, A>, flags: Flags) -> Result {
+ self.reader().read_all(buf, flags)
+ }
+
+ /// Constructs a [`UserSliceReader`].
+ pub fn reader(self) -> UserSliceReader {
+ UserSliceReader {
+ ptr: self.ptr,
+ length: self.length,
+ }
+ }
+
+ /// Constructs a [`UserSliceWriter`].
+ pub fn writer(self) -> UserSliceWriter {
+ UserSliceWriter {
+ ptr: self.ptr,
+ length: self.length,
+ }
+ }
+
+ /// Constructs both a [`UserSliceReader`] and a [`UserSliceWriter`].
+ ///
+ /// Usually when this is used, you will first read the data, and then overwrite it afterwards.
+ pub fn reader_writer(self) -> (UserSliceReader, UserSliceWriter) {
+ (
+ UserSliceReader {
+ ptr: self.ptr,
+ length: self.length,
+ },
+ UserSliceWriter {
+ ptr: self.ptr,
+ length: self.length,
+ },
+ )
+ }
+}
+
+/// A reader for [`UserSlice`].
+///
+/// Used to incrementally read from the user slice.
+pub struct UserSliceReader {
+ ptr: UserPtr,
+ length: usize,
+}
+
+impl UserSliceReader {
+ /// Skip the provided number of bytes.
+ ///
+ /// Returns an error if skipping more than the length of the buffer.
+ pub fn skip(&mut self, num_skip: usize) -> Result {
+ // Update `self.length` first since that's the fallible part of this operation.
+ self.length = self.length.checked_sub(num_skip).ok_or(EFAULT)?;
+ self.ptr = self.ptr.wrapping_add(num_skip);
+ Ok(())
+ }
+
+ /// Create a reader that can access the same range of data.
+ ///
+ /// Reading from the clone does not advance the current reader.
+ ///
+ /// The caller should take care to not introduce TOCTOU issues, as described in the
+ /// documentation for [`UserSlice`].
+ pub fn clone_reader(&self) -> UserSliceReader {
+ UserSliceReader {
+ ptr: self.ptr,
+ length: self.length,
+ }
+ }
+
+ /// Returns the number of bytes left to be read from this reader.
+ ///
+ /// Note that even reading less than this number of bytes may fail.
+ pub fn len(&self) -> usize {
+ self.length
+ }
+
+ /// Returns `true` if no data is available in the io buffer.
+ pub fn is_empty(&self) -> bool {
+ self.length == 0
+ }
+
+ /// Reads raw data from the user slice into a kernel buffer.
+ ///
+ /// For a version that uses `&mut [u8]`, please see [`UserSliceReader::read_slice`].
+ ///
+ /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of
+ /// bounds of this [`UserSliceReader`]. This call may modify `out` even if it returns an error.
+ ///
+ /// # Guarantees
+ ///
+ /// After a successful call to this method, all bytes in `out` are initialized.
+ pub fn read_raw(&mut self, out: &mut [MaybeUninit<u8>]) -> Result {
+ let len = out.len();
+ let out_ptr = out.as_mut_ptr().cast::<c_void>();
+ if len > self.length {
+ return Err(EFAULT);
+ }
+ // SAFETY: `out_ptr` points into a mutable slice of length `len`, so we may write
+ // that many bytes to it.
+ let res = unsafe { bindings::copy_from_user(out_ptr, self.ptr as *const c_void, len) };
+ if res != 0 {
+ return Err(EFAULT);
+ }
+ self.ptr = self.ptr.wrapping_add(len);
+ self.length -= len;
+ Ok(())
+ }
+
+ /// Reads raw data from the user slice into a kernel buffer.
+ ///
+ /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of
+ /// bounds of this [`UserSliceReader`]. This call may modify `out` even if it returns an error.
+ pub fn read_slice(&mut self, out: &mut [u8]) -> Result {
+ // SAFETY: The types are compatible and `read_raw` doesn't write uninitialized bytes to
+ // `out`.
+ let out = unsafe { &mut *(out as *mut [u8] as *mut [MaybeUninit<u8>]) };
+ self.read_raw(out)
+ }
+
+ /// Reads a value of the specified type.
+ ///
+ /// Fails with [`EFAULT`] if the read happens on a bad address, or if the read goes out of
+ /// bounds of this [`UserSliceReader`].
+ pub fn read<T: FromBytes>(&mut self) -> Result<T> {
+ let len = size_of::<T>();
+ if len > self.length {
+ return Err(EFAULT);
+ }
+ let mut out: MaybeUninit<T> = MaybeUninit::uninit();
+ // SAFETY: The local variable `out` is valid for writing `size_of::<T>()` bytes.
+ //
+ // By using the _copy_from_user variant, we skip the check_object_size check that verifies
+ // the kernel pointer. This mirrors the logic on the C side that skips the check when the
+ // length is a compile-time constant.
+ let res = unsafe {
+ bindings::_copy_from_user(
+ out.as_mut_ptr().cast::<c_void>(),
+ self.ptr as *const c_void,
+ len,
+ )
+ };
+ if res != 0 {
+ return Err(EFAULT);
+ }
+ self.ptr = self.ptr.wrapping_add(len);
+ self.length -= len;
+ // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements
+ // `FromBytes`, any bit-pattern is a valid value for this type.
+ Ok(unsafe { out.assume_init() })
+ }
+
+ /// Reads the entirety of the user slice, appending it to the end of the provided buffer.
+ ///
+ /// Fails with [`EFAULT`] if the read happens on a bad address.
+ pub fn read_all<A: Allocator>(mut self, buf: &mut Vec<u8, A>, flags: Flags) -> Result {
+ let len = self.length;
+ buf.reserve(len, flags)?;
+
+ // The call to `try_reserve` was successful, so the spare capacity is at least `len` bytes
+ // long.
+ self.read_raw(&mut buf.spare_capacity_mut()[..len])?;
+
+ // SAFETY: Since the call to `read_raw` was successful, so the next `len` bytes of the
+ // vector have been initialized.
+ unsafe { buf.set_len(buf.len() + len) };
+ Ok(())
+ }
+}
+
+/// A writer for [`UserSlice`].
+///
+/// Used to incrementally write into the user slice.
+pub struct UserSliceWriter {
+ ptr: UserPtr,
+ length: usize,
+}
+
+impl UserSliceWriter {
+ /// Returns the amount of space remaining in this buffer.
+ ///
+ /// Note that even writing less than this number of bytes may fail.
+ pub fn len(&self) -> usize {
+ self.length
+ }
+
+ /// Returns `true` if no more data can be written to this buffer.
+ pub fn is_empty(&self) -> bool {
+ self.length == 0
+ }
+
+ /// Writes raw data to this user pointer from a kernel buffer.
+ ///
+ /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of
+ /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even
+ /// if it returns an error.
+ pub fn write_slice(&mut self, data: &[u8]) -> Result {
+ let len = data.len();
+ let data_ptr = data.as_ptr().cast::<c_void>();
+ if len > self.length {
+ return Err(EFAULT);
+ }
+ // SAFETY: `data_ptr` points into an immutable slice of length `len`, so we may read
+ // that many bytes from it.
+ let res = unsafe { bindings::copy_to_user(self.ptr as *mut c_void, data_ptr, len) };
+ if res != 0 {
+ return Err(EFAULT);
+ }
+ self.ptr = self.ptr.wrapping_add(len);
+ self.length -= len;
+ Ok(())
+ }
+
+ /// Writes the provided Rust value to this userspace pointer.
+ ///
+ /// Fails with [`EFAULT`] if the write happens on a bad address, or if the write goes out of
+ /// bounds of this [`UserSliceWriter`]. This call may modify the associated userspace slice even
+ /// if it returns an error.
+ pub fn write<T: AsBytes>(&mut self, value: &T) -> Result {
+ let len = size_of::<T>();
+ if len > self.length {
+ return Err(EFAULT);
+ }
+ // SAFETY: The reference points to a value of type `T`, so it is valid for reading
+ // `size_of::<T>()` bytes.
+ //
+ // By using the _copy_to_user variant, we skip the check_object_size check that verifies the
+ // kernel pointer. This mirrors the logic on the C side that skips the check when the length
+ // is a compile-time constant.
+ let res = unsafe {
+ bindings::_copy_to_user(
+ self.ptr as *mut c_void,
+ (value as *const T).cast::<c_void>(),
+ len,
+ )
+ };
+ if res != 0 {
+ return Err(EFAULT);
+ }
+ self.ptr = self.ptr.wrapping_add(len);
+ self.length -= len;
+ Ok(())
+ }
+}
diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs
index 498397877376..b7be224cdf4b 100644
--- a/rust/kernel/workqueue.rs
+++ b/rust/kernel/workqueue.rs
@@ -12,19 +12,19 @@
//!
//! # The raw API
//!
-//! The raw API consists of the `RawWorkItem` trait, where the work item needs to provide an
+//! The raw API consists of the [`RawWorkItem`] trait, where the work item needs to provide an
//! arbitrary function that knows how to enqueue the work item. It should usually not be used
//! directly, but if you want to, you can use it without using the pieces from the safe API.
//!
//! # The safe API
//!
-//! The safe API is used via the `Work` struct and `WorkItem` traits. Furthermore, it also includes
-//! a trait called `WorkItemPointer`, which is usually not used directly by the user.
+//! The safe API is used via the [`Work`] struct and [`WorkItem`] traits. Furthermore, it also
+//! includes a trait called [`WorkItemPointer`], which is usually not used directly by the user.
//!
-//! * The `Work` struct is the Rust wrapper for the C `work_struct` type.
-//! * The `WorkItem` trait is implemented for structs that can be enqueued to a workqueue.
-//! * The `WorkItemPointer` trait is implemented for the pointer type that points at a something
-//! that implements `WorkItem`.
+//! * The [`Work`] struct is the Rust wrapper for the C `work_struct` type.
+//! * The [`WorkItem`] trait is implemented for structs that can be enqueued to a workqueue.
+//! * The [`WorkItemPointer`] trait is implemented for the pointer type that points at a something
+//! that implements [`WorkItem`].
//!
//! ## Example
//!
@@ -33,10 +33,8 @@
//! we do not need to specify ids for the fields.
//!
//! ```
-//! use kernel::prelude::*;
//! use kernel::sync::Arc;
-//! use kernel::workqueue::{self, Work, WorkItem};
-//! use kernel::{impl_has_work, new_work};
+//! use kernel::workqueue::{self, impl_has_work, new_work, Work, WorkItem};
//!
//! #[pin_data]
//! struct MyStruct {
@@ -54,7 +52,7 @@
//! Arc::pin_init(pin_init!(MyStruct {
//! value,
//! work <- new_work!("MyStruct::work"),
-//! }))
+//! }), GFP_KERNEL)
//! }
//! }
//!
@@ -62,7 +60,7 @@
//! type Pointer = Arc<MyStruct>;
//!
//! fn run(this: Arc<MyStruct>) {
-//! pr_info!("The value is: {}", this.value);
+//! pr_info!("The value is: {}\n", this.value);
//! }
//! }
//!
@@ -71,15 +69,14 @@
//! fn print_later(val: Arc<MyStruct>) {
//! let _ = workqueue::system().enqueue(val);
//! }
+//! # print_later(MyStruct::new(42).unwrap());
//! ```
//!
//! The following example shows how multiple `work_struct` fields can be used:
//!
//! ```
-//! use kernel::prelude::*;
//! use kernel::sync::Arc;
-//! use kernel::workqueue::{self, Work, WorkItem};
-//! use kernel::{impl_has_work, new_work};
+//! use kernel::workqueue::{self, impl_has_work, new_work, Work, WorkItem};
//!
//! #[pin_data]
//! struct MyStruct {
@@ -103,7 +100,7 @@
//! value_2,
//! work_1 <- new_work!("MyStruct::work_1"),
//! work_2 <- new_work!("MyStruct::work_2"),
-//! }))
+//! }), GFP_KERNEL)
//! }
//! }
//!
@@ -111,7 +108,7 @@
//! type Pointer = Arc<MyStruct>;
//!
//! fn run(this: Arc<MyStruct>) {
-//! pr_info!("The value is: {}", this.value_1);
+//! pr_info!("The value is: {}\n", this.value_1);
//! }
//! }
//!
@@ -119,7 +116,7 @@
//! type Pointer = Arc<MyStruct>;
//!
//! fn run(this: Arc<MyStruct>) {
-//! pr_info!("The second value is: {}", this.value_2);
+//! pr_info!("The second value is: {}\n", this.value_2);
//! }
//! }
//!
@@ -130,15 +127,15 @@
//! fn print_2_later(val: Arc<MyStruct>) {
//! let _ = workqueue::system().enqueue::<Arc<MyStruct>, 2>(val);
//! }
+//! # print_1_later(MyStruct::new(24, 25).unwrap());
+//! # print_2_later(MyStruct::new(41, 42).unwrap());
//! ```
//!
//! C header: [`include/linux/workqueue.h`](srctree/include/linux/workqueue.h)
-use crate::{bindings, prelude::*, sync::Arc, sync::LockClassKey, types::Opaque};
-use alloc::alloc::AllocError;
-use alloc::boxed::Box;
+use crate::alloc::{AllocError, Flags};
+use crate::{prelude::*, sync::Arc, sync::LockClassKey, types::Opaque};
use core::marker::PhantomData;
-use core::pin::Pin;
/// Creates a [`Work`] initialiser with the given name and a newly-created lock class.
#[macro_export]
@@ -147,6 +144,7 @@ macro_rules! new_work {
$crate::workqueue::Work::new($crate::optional_name!($($name)?), $crate::static_lock_class!())
};
}
+pub use new_work;
/// A kernel work queue.
///
@@ -168,7 +166,7 @@ impl Queue {
/// # Safety
///
/// The caller must ensure that the provided raw pointer is not dangling, that it points at a
- /// valid workqueue, and that it remains valid until the end of 'a.
+ /// valid workqueue, and that it remains valid until the end of `'a`.
pub unsafe fn from_raw<'a>(ptr: *const bindings::workqueue_struct) -> &'a Queue {
// SAFETY: The `Queue` type is `#[repr(transparent)]`, so the pointer cast is valid. The
// caller promises that the pointer is not dangling.
@@ -199,7 +197,11 @@ impl Queue {
// stay valid until we call the function pointer in the `work_struct`, so the access is ok.
unsafe {
w.__enqueue(move |work_ptr| {
- bindings::queue_work_on(bindings::WORK_CPU_UNBOUND as _, queue_ptr, work_ptr)
+ bindings::queue_work_on(
+ bindings::wq_misc_consts_WORK_CPU_UNBOUND as _,
+ queue_ptr,
+ work_ptr,
+ )
})
}
}
@@ -207,18 +209,24 @@ impl Queue {
/// Tries to spawn the given function or closure as a work item.
///
/// This method can fail because it allocates memory to store the work item.
- pub fn try_spawn<T: 'static + Send + FnOnce()>(&self, func: T) -> Result<(), AllocError> {
+ pub fn try_spawn<T: 'static + Send + FnOnce()>(
+ &self,
+ flags: Flags,
+ func: T,
+ ) -> Result<(), AllocError> {
let init = pin_init!(ClosureWork {
work <- new_work!("Queue::try_spawn"),
func: Some(func),
});
- self.enqueue(Box::pin_init(init).map_err(|_| AllocError)?);
+ self.enqueue(KBox::pin_init(init, flags).map_err(|_| AllocError)?);
Ok(())
}
}
-/// A helper type used in `try_spawn`.
+/// A helper type used in [`try_spawn`].
+///
+/// [`try_spawn`]: Queue::try_spawn
#[pin_data]
struct ClosureWork<T> {
#[pin]
@@ -234,9 +242,9 @@ impl<T> ClosureWork<T> {
}
impl<T: FnOnce()> WorkItem for ClosureWork<T> {
- type Pointer = Pin<Box<Self>>;
+ type Pointer = Pin<KBox<Self>>;
- fn run(mut this: Pin<Box<Self>>) {
+ fn run(mut this: Pin<KBox<Self>>) {
if let Some(func) = this.as_mut().project().take() {
(func)()
}
@@ -253,14 +261,16 @@ impl<T: FnOnce()> WorkItem for ClosureWork<T> {
/// actual value of the id is not important as long as you use different ids for different fields
/// of the same struct. (Fields of different structs need not use different ids.)
///
-/// Note that the id is used only to select the right method to call during compilation. It wont be
+/// Note that the id is used only to select the right method to call during compilation. It won't be
/// part of the final executable.
///
/// # Safety
///
-/// Implementers must ensure that any pointers passed to a `queue_work_on` closure by `__enqueue`
+/// Implementers must ensure that any pointers passed to a `queue_work_on` closure by [`__enqueue`]
/// remain valid for the duration specified in the guarantees section of the documentation for
-/// `__enqueue`.
+/// [`__enqueue`].
+///
+/// [`__enqueue`]: RawWorkItem::__enqueue
pub unsafe trait RawWorkItem<const ID: u64> {
/// The return type of [`Queue::enqueue`].
type EnqueueOutput;
@@ -290,10 +300,11 @@ pub unsafe trait RawWorkItem<const ID: u64> {
/// Defines the method that should be called directly when a work item is executed.
///
-/// This trait is implemented by `Pin<Box<T>>` and `Arc<T>`, and is mainly intended to be
+/// This trait is implemented by `Pin<KBox<T>>` and [`Arc<T>`], and is mainly intended to be
/// implemented for smart pointer types. For your own structs, you would implement [`WorkItem`]
-/// instead. The `run` method on this trait will usually just perform the appropriate
-/// `container_of` translation and then call into the `run` method from the [`WorkItem`] trait.
+/// instead. The [`run`] method on this trait will usually just perform the appropriate
+/// `container_of` translation and then call into the [`run`][WorkItem::run] method from the
+/// [`WorkItem`] trait.
///
/// This trait is used when the `work_struct` field is defined using the [`Work`] helper.
///
@@ -309,8 +320,10 @@ pub unsafe trait WorkItemPointer<const ID: u64>: RawWorkItem<ID> {
///
/// # Safety
///
- /// The provided `work_struct` pointer must originate from a previous call to `__enqueue` where
- /// the `queue_work_on` closure returned true, and the pointer must still be valid.
+ /// The provided `work_struct` pointer must originate from a previous call to [`__enqueue`]
+ /// where the `queue_work_on` closure returned true, and the pointer must still be valid.
+ ///
+ /// [`__enqueue`]: RawWorkItem::__enqueue
unsafe extern "C" fn run(ptr: *mut bindings::work_struct);
}
@@ -319,7 +332,7 @@ pub unsafe trait WorkItemPointer<const ID: u64>: RawWorkItem<ID> {
/// This trait is used when the `work_struct` field is defined using the [`Work`] helper.
pub trait WorkItem<const ID: u64 = 0> {
/// The pointer type that this struct is wrapped in. This will typically be `Arc<Self>` or
- /// `Pin<Box<Self>>`.
+ /// `Pin<KBox<Self>>`.
type Pointer: WorkItemPointer<ID>;
/// The method that should be called when this work item is executed.
@@ -328,14 +341,18 @@ pub trait WorkItem<const ID: u64 = 0> {
/// Links for a work item.
///
-/// This struct contains a function pointer to the `run` function from the [`WorkItemPointer`]
+/// This struct contains a function pointer to the [`run`] function from the [`WorkItemPointer`]
/// trait, and defines the linked list pointers necessary to enqueue a work item in a workqueue.
///
/// Wraps the kernel's C `struct work_struct`.
///
/// This is a helper type used to associate a `work_struct` with the [`WorkItem`] that uses it.
+///
+/// [`run`]: WorkItemPointer::run
+#[pin_data]
#[repr(transparent)]
pub struct Work<T: ?Sized, const ID: u64 = 0> {
+ #[pin]
work: Opaque<bindings::work_struct>,
_inner: PhantomData<T>,
}
@@ -352,26 +369,26 @@ unsafe impl<T: ?Sized, const ID: u64> Sync for Work<T, ID> {}
impl<T: ?Sized, const ID: u64> Work<T, ID> {
/// Creates a new instance of [`Work`].
#[inline]
- #[allow(clippy::new_ret_no_self)]
pub fn new(name: &'static CStr, key: &'static LockClassKey) -> impl PinInit<Self>
where
T: WorkItem<ID>,
{
- // SAFETY: The `WorkItemPointer` implementation promises that `run` can be used as the work
- // item function.
- unsafe {
- kernel::init::pin_init_from_closure(move |slot| {
- let slot = Self::raw_get(slot);
- bindings::init_work_with_key(
- slot,
- Some(T::Pointer::run),
- false,
- name.as_char_ptr(),
- key.as_ptr(),
- );
- Ok(())
- })
- }
+ pin_init!(Self {
+ work <- Opaque::ffi_init(|slot| {
+ // SAFETY: The `WorkItemPointer` implementation promises that `run` can be used as
+ // the work item function.
+ unsafe {
+ bindings::init_work_with_key(
+ slot,
+ Some(T::Pointer::run),
+ false,
+ name.as_char_ptr(),
+ key.as_ptr(),
+ )
+ }
+ }),
+ _inner: PhantomData,
+ })
}
/// Get a pointer to the inner `work_struct`.
@@ -396,9 +413,7 @@ impl<T: ?Sized, const ID: u64> Work<T, ID> {
/// like this:
///
/// ```no_run
-/// use kernel::impl_has_work;
-/// use kernel::prelude::*;
-/// use kernel::workqueue::Work;
+/// use kernel::workqueue::{impl_has_work, Work};
///
/// struct MyWorkItem {
/// work_field: Work<MyWorkItem, 1>,
@@ -409,28 +424,25 @@ impl<T: ?Sized, const ID: u64> Work<T, ID> {
/// }
/// ```
///
-/// Note that since the `Work` type is annotated with an id, you can have several `work_struct`
+/// Note that since the [`Work`] type is annotated with an id, you can have several `work_struct`
/// fields by using a different id for each one.
///
/// # Safety
///
-/// The [`OFFSET`] constant must be the offset of a field in Self of type [`Work<T, ID>`]. The methods on
-/// this trait must have exactly the behavior that the definitions given below have.
+/// The [`OFFSET`] constant must be the offset of a field in `Self` of type [`Work<T, ID>`]. The
+/// methods on this trait must have exactly the behavior that the definitions given below have.
///
-/// [`Work<T, ID>`]: Work
/// [`impl_has_work!`]: crate::impl_has_work
/// [`OFFSET`]: HasWork::OFFSET
pub unsafe trait HasWork<T, const ID: u64 = 0> {
/// The offset of the [`Work<T, ID>`] field.
- ///
- /// [`Work<T, ID>`]: Work
const OFFSET: usize;
/// Returns the offset of the [`Work<T, ID>`] field.
///
- /// This method exists because the [`OFFSET`] constant cannot be accessed if the type is not Sized.
+ /// This method exists because the [`OFFSET`] constant cannot be accessed if the type is not
+ /// [`Sized`].
///
- /// [`Work<T, ID>`]: Work
/// [`OFFSET`]: HasWork::OFFSET
#[inline]
fn get_work_offset(&self) -> usize {
@@ -442,8 +454,6 @@ pub unsafe trait HasWork<T, const ID: u64 = 0> {
/// # Safety
///
/// The provided pointer must point at a valid struct of type `Self`.
- ///
- /// [`Work<T, ID>`]: Work
#[inline]
unsafe fn raw_get_work(ptr: *mut Self) -> *mut Work<T, ID> {
// SAFETY: The caller promises that the pointer is valid.
@@ -455,8 +465,6 @@ pub unsafe trait HasWork<T, const ID: u64 = 0> {
/// # Safety
///
/// The pointer must point at a [`Work<T, ID>`] field in a struct of type `Self`.
- ///
- /// [`Work<T, ID>`]: Work
#[inline]
unsafe fn work_container_of(ptr: *mut Work<T, ID>) -> *mut Self
where
@@ -473,30 +481,29 @@ pub unsafe trait HasWork<T, const ID: u64 = 0> {
/// # Examples
///
/// ```
-/// use kernel::impl_has_work;
/// use kernel::sync::Arc;
-/// use kernel::workqueue::{self, Work};
+/// use kernel::workqueue::{self, impl_has_work, Work};
///
-/// struct MyStruct {
-/// work_field: Work<MyStruct, 17>,
+/// struct MyStruct<'a, T, const N: usize> {
+/// work_field: Work<MyStruct<'a, T, N>, 17>,
+/// f: fn(&'a [T; N]),
/// }
///
/// impl_has_work! {
-/// impl HasWork<MyStruct, 17> for MyStruct { self.work_field }
+/// impl{'a, T, const N: usize} HasWork<MyStruct<'a, T, N>, 17>
+/// for MyStruct<'a, T, N> { self.work_field }
/// }
/// ```
-///
-/// [`HasWork<T, ID>`]: HasWork
#[macro_export]
macro_rules! impl_has_work {
- ($(impl$(<$($implarg:ident),*>)?
+ ($(impl$({$($generics:tt)*})?
HasWork<$work_type:ty $(, $id:tt)?>
- for $self:ident $(<$($selfarg:ident),*>)?
+ for $self:ty
{ self.$field:ident }
)*) => {$(
// SAFETY: The implementation of `raw_get_work` only compiles if the field has the right
// type.
- unsafe impl$(<$($implarg),*>)? $crate::workqueue::HasWork<$work_type $(, $id)?> for $self $(<$($selfarg),*>)? {
+ unsafe impl$(<$($generics)+>)? $crate::workqueue::HasWork<$work_type $(, $id)?> for $self {
const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize;
#[inline]
@@ -509,18 +516,28 @@ macro_rules! impl_has_work {
}
)*};
}
+pub use impl_has_work;
impl_has_work! {
- impl<T> HasWork<Self> for ClosureWork<T> { self.work }
+ impl{T} HasWork<Self> for ClosureWork<T> { self.work }
}
+// SAFETY: The `__enqueue` implementation in RawWorkItem uses a `work_struct` initialized with the
+// `run` method of this trait as the function pointer because:
+// - `__enqueue` gets the `work_struct` from the `Work` field, using `T::raw_get_work`.
+// - The only safe way to create a `Work` object is through `Work::new`.
+// - `Work::new` makes sure that `T::Pointer::run` is passed to `init_work_with_key`.
+// - Finally `Work` and `RawWorkItem` guarantee that the correct `Work` field
+// will be used because of the ID const generic bound. This makes sure that `T::raw_get_work`
+// uses the correct offset for the `Work` field, and `Work::new` picks the correct
+// implementation of `WorkItemPointer` for `Arc<T>`.
unsafe impl<T, const ID: u64> WorkItemPointer<ID> for Arc<T>
where
T: WorkItem<ID, Pointer = Self>,
T: HasWork<T, ID>,
{
unsafe extern "C" fn run(ptr: *mut bindings::work_struct) {
- // SAFETY: The `__enqueue` method always uses a `work_struct` stored in a `Work<T, ID>`.
+ // The `__enqueue` method always uses a `work_struct` stored in a `Work<T, ID>`.
let ptr = ptr as *mut Work<T, ID>;
// SAFETY: This computes the pointer that `__enqueue` got from `Arc::into_raw`.
let ptr = unsafe { T::work_container_of(ptr) };
@@ -531,6 +548,13 @@ where
}
}
+// SAFETY: The `work_struct` raw pointer is guaranteed to be valid for the duration of the call to
+// the closure because we get it from an `Arc`, which means that the ref count will be at least 1,
+// and we don't drop the `Arc` ourselves. If `queue_work_on` returns true, it is further guaranteed
+// to be valid until a call to the function pointer in `work_struct` because we leak the memory it
+// points to, and only reclaim it if the closure returns false, or in `WorkItemPointer::run`, which
+// is what the function pointer in the `work_struct` must be pointing to, according to the safety
+// requirements of `WorkItemPointer`.
unsafe impl<T, const ID: u64> RawWorkItem<ID> for Arc<T>
where
T: WorkItem<ID, Pointer = Self>,
@@ -559,18 +583,19 @@ where
}
}
-unsafe impl<T, const ID: u64> WorkItemPointer<ID> for Pin<Box<T>>
+// SAFETY: TODO.
+unsafe impl<T, const ID: u64> WorkItemPointer<ID> for Pin<KBox<T>>
where
T: WorkItem<ID, Pointer = Self>,
T: HasWork<T, ID>,
{
unsafe extern "C" fn run(ptr: *mut bindings::work_struct) {
- // SAFETY: The `__enqueue` method always uses a `work_struct` stored in a `Work<T, ID>`.
+ // The `__enqueue` method always uses a `work_struct` stored in a `Work<T, ID>`.
let ptr = ptr as *mut Work<T, ID>;
// SAFETY: This computes the pointer that `__enqueue` got from `Arc::into_raw`.
let ptr = unsafe { T::work_container_of(ptr) };
// SAFETY: This pointer comes from `Arc::into_raw` and we've been given back ownership.
- let boxed = unsafe { Box::from_raw(ptr) };
+ let boxed = unsafe { KBox::from_raw(ptr) };
// SAFETY: The box was already pinned when it was enqueued.
let pinned = unsafe { Pin::new_unchecked(boxed) };
@@ -578,7 +603,8 @@ where
}
}
-unsafe impl<T, const ID: u64> RawWorkItem<ID> for Pin<Box<T>>
+// SAFETY: TODO.
+unsafe impl<T, const ID: u64> RawWorkItem<ID> for Pin<KBox<T>>
where
T: WorkItem<ID, Pointer = Self>,
T: HasWork<T, ID>,
@@ -592,9 +618,9 @@ where
// SAFETY: We're not going to move `self` or any of its fields, so its okay to temporarily
// remove the `Pin` wrapper.
let boxed = unsafe { Pin::into_inner_unchecked(self) };
- let ptr = Box::into_raw(boxed);
+ let ptr = KBox::into_raw(boxed);
- // SAFETY: Pointers into a `Box` point at a valid value.
+ // SAFETY: Pointers into a `KBox` point at a valid value.
let work_ptr = unsafe { T::raw_get_work(ptr) };
// SAFETY: `raw_get_work` returns a pointer to a valid value.
let work_ptr = unsafe { Work::raw_get(work_ptr) };