From 87990025b87283f1b8c50d4d75379ca6d86d2211 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 26 Sep 2025 15:05:52 +0200 Subject: gpu: nova-core: gsp: remove useless conversion Because nova-core depends on CONFIG_64BIT and a raw DmaAddress is always a u64, we can remove the now actually useless conversion. Signed-off-by: Danilo Krummrich Reviewed-by: John Hubbard [acourbot@nvidia.com: reword commit as suggested by John.] Signed-off-by: Alexandre Courbot Message-ID: <20250926130623.61316-1-dakr@kernel.org> --- drivers/gpu/nova-core/firmware/gsp.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs index 9b70095434c6..ca785860e1c8 100644 --- a/drivers/gpu/nova-core/firmware/gsp.rs +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -202,8 +202,7 @@ impl GspFirmware { let mut level0_data = kvec![0u8; GSP_PAGE_SIZE]?; // Fill level 1 page entry. - #[allow(clippy::useless_conversion)] - let level1_entry = u64::from(level1.iter().next().unwrap().dma_address()); + let level1_entry = level1.iter().next().unwrap().dma_address(); let dst = &mut level0_data[..size_of_val(&level1_entry)]; dst.copy_from_slice(&level1_entry.to_le_bytes()); -- cgit From f7a33a67c50c92589b046e69b9075b7d28d31f87 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 26 Sep 2025 15:05:53 +0200 Subject: gpu: nova-core: gsp: do not unwrap() SGEntry Don't use unwrap() to extract an Option, instead handle the error condition gracefully. Fixes: a841614e607c ("gpu: nova-core: firmware: process and prepare the GSP firmware") Signed-off-by: Danilo Krummrich Reviewed-by: John Hubbard Signed-off-by: Alexandre Courbot Message-ID: <20250926130623.61316-2-dakr@kernel.org> --- drivers/gpu/nova-core/firmware/gsp.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs index ca785860e1c8..6b0761460a57 100644 --- a/drivers/gpu/nova-core/firmware/gsp.rs +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -202,9 +202,10 @@ impl GspFirmware { let mut level0_data = kvec![0u8; GSP_PAGE_SIZE]?; // Fill level 1 page entry. - let level1_entry = level1.iter().next().unwrap().dma_address(); - let dst = &mut level0_data[..size_of_val(&level1_entry)]; - dst.copy_from_slice(&level1_entry.to_le_bytes()); + let level1_entry = level1.iter().next().ok_or(EINVAL)?; + let level1_entry_addr = level1_entry.dma_address(); + let dst = &mut level0_data[..size_of_val(&level1_entry_addr)]; + dst.copy_from_slice(&level1_entry_addr.to_le_bytes()); // Turn the level0 page table into a [`DmaObject`]. DmaObject::from_data(dev, &level0_data)? -- cgit From 1d5cffebd930d61588c32198f85fbe541ab97b8f Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 8 Oct 2025 20:47:32 -0400 Subject: gpu: nova-core: vbios: Rework BiosImage to be simpler Currently, the BiosImage type in vbios code is implemented as a type-wrapping enum with the sole purpose of implementing a type that is common to all specific image types. To make this work, macros were used to overcome limitations of using enums. Ugly match statements were also required to route methods from the enum type to the specific image type. Simplify the code by just creating the common BiosImage type in the iterator, and then converting it to specific image type after. This works well since all the methods common to different BiosImage are called only during the iteration and not later. Should we need to call these common methods later, we can use AsRef and traits, but for now not doing so gives us a nice ~50 negative line delta versus the existing code and is a lot simpler. Also remove the now obsolete BiosImage enum type. Cc: Benno Lossin Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251009004732.2287050-1-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/vbios.rs | 226 +++++++++++++++++------------------------ 1 file changed, 94 insertions(+), 132 deletions(-) diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 71fbe71b84db..ad070a0420ca 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -22,6 +22,34 @@ const BIOS_READ_AHEAD_SIZE: usize = 1024; /// indicates the last image. Bit 0-6 are reserved, bit 7 is last image bit. const LAST_IMAGE_BIT_MASK: u8 = 0x80; +/// BIOS Image Type from PCI Data Structure code_type field. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +enum BiosImageType { + /// PC-AT compatible BIOS image (x86 legacy) + PciAt = 0x00, + /// EFI (Extensible Firmware Interface) BIOS image + Efi = 0x03, + /// NBSI (Notebook System Information) BIOS image + Nbsi = 0x70, + /// FwSec (Firmware Security) BIOS image + FwSec = 0xE0, +} + +impl TryFrom for BiosImageType { + type Error = Error; + + fn try_from(code: u8) -> Result { + match code { + 0x00 => Ok(Self::PciAt), + 0x03 => Ok(Self::Efi), + 0x70 => Ok(Self::Nbsi), + 0xE0 => Ok(Self::FwSec), + _ => Err(EINVAL), + } + } +} + // PMU lookup table entry types. Used to locate PMU table entries // in the Fwsec image, corresponding to falcon ucodes. #[expect(dead_code)] @@ -197,32 +225,37 @@ impl Vbios { // Parse all VBIOS images in the ROM for image_result in VbiosIterator::new(dev, bar0)? { - let full_image = image_result?; + let image = image_result?; dev_dbg!( dev, - "Found BIOS image: size: {:#x}, type: {}, last: {}\n", - full_image.image_size_bytes(), - full_image.image_type_str(), - full_image.is_last() + "Found BIOS image: size: {:#x}, type: {:?}, last: {}\n", + image.image_size_bytes(), + image.image_type(), + image.is_last() ); - // Get references to images we will need after the loop, in order to - // setup the falcon data offset. - match full_image { - BiosImage::PciAt(image) => { - pci_at_image = Some(image); + // Convert to a specific image type + match BiosImageType::try_from(image.pcir.code_type) { + Ok(BiosImageType::PciAt) => { + pci_at_image = Some(PciAtBiosImage::try_from(image)?); } - BiosImage::FwSec(image) => { + Ok(BiosImageType::FwSec) => { + let fwsec = FwSecBiosBuilder { + base: image, + falcon_data_offset: None, + pmu_lookup_table: None, + falcon_ucode_offset: None, + }; if first_fwsec_image.is_none() { - first_fwsec_image = Some(image); + first_fwsec_image = Some(fwsec); } else { - second_fwsec_image = Some(image); + second_fwsec_image = Some(fwsec); } } - // For now we don't need to handle these - BiosImage::Efi(_image) => {} - BiosImage::Nbsi(_image) => {} + _ => { + // Ignore other image types or unknown types + } } } @@ -594,108 +627,29 @@ impl NpdeStruct { } } -// Use a macro to implement BiosImage enum and methods. This avoids having to -// repeat each enum type when implementing functions like base() in BiosImage. -macro_rules! bios_image { - ( - $($variant:ident: $class:ident),* $(,)? - ) => { - // BiosImage enum with variants for each image type - enum BiosImage { - $($variant($class)),* - } - - impl BiosImage { - /// Get a reference to the common BIOS image data regardless of type - fn base(&self) -> &BiosImageBase { - match self { - $(Self::$variant(img) => &img.base),* - } - } - - /// Returns a string representing the type of BIOS image - fn image_type_str(&self) -> &'static str { - match self { - $(Self::$variant(_) => stringify!($variant)),* - } - } - } - } -} - -impl BiosImage { - /// Check if this is the last image. - fn is_last(&self) -> bool { - let base = self.base(); - - // For NBSI images (type == 0x70), return true as they're - // considered the last image - if matches!(self, Self::Nbsi(_)) { - return true; - } - - // For other image types, check the NPDE first if available - if let Some(ref npde) = base.npde { - return npde.is_last(); - } - - // Otherwise, fall back to checking the PCIR last_image flag - base.pcir.is_last() - } - - /// Get the image size in bytes. - fn image_size_bytes(&self) -> usize { - let base = self.base(); - - // Prefer NPDE image size if available - if let Some(ref npde) = base.npde { - return npde.image_size_bytes(); - } - - // Otherwise, fall back to the PCIR image size - base.pcir.image_size_bytes() - } - - /// Create a [`BiosImageBase`] from a byte slice and convert it to a [`BiosImage`] which - /// triggers the constructor of the specific BiosImage enum variant. - fn new(dev: &device::Device, data: &[u8]) -> Result { - let base = BiosImageBase::new(dev, data)?; - let image = base.into_image().inspect_err(|e| { - dev_err!(dev, "Failed to create BiosImage: {:?}\n", e); - })?; - - Ok(image) - } -} - -bios_image! { - PciAt: PciAtBiosImage, // PCI-AT compatible BIOS image - Efi: EfiBiosImage, // EFI (Extensible Firmware Interface) - Nbsi: NbsiBiosImage, // NBSI (Nvidia Bios System Interface) - FwSec: FwSecBiosBuilder, // FWSEC (Firmware Security) -} - /// The PciAt BIOS image is typically the first BIOS image type found in the BIOS image chain. /// /// It contains the BIT header and the BIT tokens. struct PciAtBiosImage { - base: BiosImageBase, + base: BiosImage, bit_header: BitHeader, bit_offset: usize, } +#[expect(dead_code)] struct EfiBiosImage { - base: BiosImageBase, + base: BiosImage, // EFI-specific fields can be added here in the future. } +#[expect(dead_code)] struct NbsiBiosImage { - base: BiosImageBase, + base: BiosImage, // NBSI-specific fields can be added here in the future. } struct FwSecBiosBuilder { - base: BiosImageBase, + base: BiosImage, /// These are temporary fields that are used during the construction of the /// [`FwSecBiosBuilder`]. /// @@ -714,37 +668,16 @@ struct FwSecBiosBuilder { /// /// The PMU table contains voltage/frequency tables as well as a pointer to the Falcon Ucode. pub(crate) struct FwSecBiosImage { - base: BiosImageBase, + base: BiosImage, /// The offset of the Falcon ucode. falcon_ucode_offset: usize, } -// Convert from BiosImageBase to BiosImage -impl TryFrom for BiosImage { - type Error = Error; - - fn try_from(base: BiosImageBase) -> Result { - match base.pcir.code_type { - 0x00 => Ok(BiosImage::PciAt(base.try_into()?)), - 0x03 => Ok(BiosImage::Efi(EfiBiosImage { base })), - 0x70 => Ok(BiosImage::Nbsi(NbsiBiosImage { base })), - 0xE0 => Ok(BiosImage::FwSec(FwSecBiosBuilder { - base, - falcon_data_offset: None, - pmu_lookup_table: None, - falcon_ucode_offset: None, - })), - _ => Err(EINVAL), - } - } -} - /// BIOS Image structure containing various headers and reference fields to all BIOS images. /// -/// Each BiosImage type has a BiosImageBase type along with other image-specific fields. Note that -/// Rust favors composition of types over inheritance. +/// A BiosImage struct is embedded into all image types and implements common operations. #[expect(dead_code)] -struct BiosImageBase { +struct BiosImage { /// Used for logging. dev: ARef, /// PCI ROM Expansion Header @@ -757,12 +690,41 @@ struct BiosImageBase { data: KVec, } -impl BiosImageBase { - fn into_image(self) -> Result { - BiosImage::try_from(self) +impl BiosImage { + /// Get the image size in bytes. + fn image_size_bytes(&self) -> usize { + // Prefer NPDE image size if available + if let Some(ref npde) = self.npde { + npde.image_size_bytes() + } else { + // Otherwise, fall back to the PCIR image size + self.pcir.image_size_bytes() + } + } + + /// Get the BIOS image type. + fn image_type(&self) -> Result { + BiosImageType::try_from(self.pcir.code_type) + } + + /// Check if this is the last image. + fn is_last(&self) -> bool { + // For NBSI images (type == 0x70), return true as they're + // considered the last image + if self.pcir.code_type == BiosImageType::Nbsi as u8 { + return true; + } + + // For other image types, check the NPDE first if available + if let Some(ref npde) = self.npde { + return npde.is_last(); + } + + // Otherwise, fall back to checking the PCIR last_image flag + self.pcir.is_last() } - /// Creates a new BiosImageBase from raw byte data. + /// Creates a new BiosImage from raw byte data. fn new(dev: &device::Device, data: &[u8]) -> Result { // Ensure we have enough data for the ROM header. if data.len() < 26 { @@ -802,7 +764,7 @@ impl BiosImageBase { let mut data_copy = KVec::new(); data_copy.extend_from_slice(data, GFP_KERNEL)?; - Ok(BiosImageBase { + Ok(BiosImage { dev: dev.into(), rom_header, pcir, @@ -865,10 +827,10 @@ impl PciAtBiosImage { } } -impl TryFrom for PciAtBiosImage { +impl TryFrom for PciAtBiosImage { type Error = Error; - fn try_from(base: BiosImageBase) -> Result { + fn try_from(base: BiosImage) -> Result { let data_slice = &base.data; let (bit_header, bit_offset) = PciAtBiosImage::find_bit_header(data_slice)?; -- cgit From 8e4865faf7a97de2a0fd797556a62b31528b42bc Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 6 Oct 2025 12:05:55 +0000 Subject: drm/gpuvm: add deferred vm_bo cleanup When using GPUVM in immediate mode, it is necessary to call drm_gpuvm_unlink() from the fence signalling critical path. However, unlink may call drm_gpuvm_bo_put(), which causes some challenges: 1. drm_gpuvm_bo_put() often requires you to take resv locks, which you can't do from the fence signalling critical path. 2. drm_gpuvm_bo_put() calls drm_gem_object_put(), which is often going to be unsafe to call from the fence signalling critical path. To solve these issues, add a deferred version of drm_gpuvm_unlink() that adds the vm_bo to a deferred cleanup list, and then clean it up later. The new methods take the GEMs GPUVA lock internally rather than letting the caller do it because it also needs to perform an operation after releasing the mutex again. This is to prevent freeing the GEM while holding the mutex (more info as comments in the patch). This means that the new methods can only be used with DRM_GPUVM_IMMEDIATE_MODE. Reviewed-by: Boris Brezillon Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20251006-vmbo-defer-v4-1-30cbd2c05adb@google.com [aliceryhl: fix formatting of vm_bo = llist_entry(...) line] Signed-off-by: Alice Ryhl --- drivers/gpu/drm/drm_gpuvm.c | 190 ++++++++++++++++++++++++++++++++++++++++++++ include/drm/drm_gpuvm.h | 16 ++++ 2 files changed, 206 insertions(+) diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index af63f4d00315..936e6c1a60c1 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -876,6 +876,31 @@ __drm_gpuvm_bo_list_add(struct drm_gpuvm *gpuvm, spinlock_t *lock, cond_spin_unlock(lock, !!lock); } +/** + * drm_gpuvm_bo_is_zombie() - check whether this vm_bo is scheduled for cleanup + * @vm_bo: the &drm_gpuvm_bo + * + * When a vm_bo is scheduled for cleanup using the bo_defer list, it is not + * immediately removed from the evict and extobj lists. Therefore, anyone + * iterating these lists should skip entries that are being destroyed. + * + * Checking the refcount without incrementing it is okay as long as the lock + * protecting the evict/extobj list is held for as long as you are using the + * vm_bo, because even if the refcount hits zero while you are using it, freeing + * the vm_bo requires taking the list's lock. + * + * Zombie entries can be observed on the evict and extobj lists regardless of + * whether DRM_GPUVM_RESV_PROTECTED is used, but they remain on the lists for a + * longer time when the resv lock is used because we can't take the resv lock + * during run_job() in immediate mode, meaning that they need to remain on the + * lists until drm_gpuvm_bo_deferred_cleanup() is called. + */ +static bool +drm_gpuvm_bo_is_zombie(struct drm_gpuvm_bo *vm_bo) +{ + return !kref_read(&vm_bo->kref); +} + /** * drm_gpuvm_bo_list_add() - insert a vm_bo into the given list * @__vm_bo: the &drm_gpuvm_bo @@ -1081,6 +1106,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, INIT_LIST_HEAD(&gpuvm->evict.list); spin_lock_init(&gpuvm->evict.lock); + init_llist_head(&gpuvm->bo_defer); + kref_init(&gpuvm->kref); gpuvm->name = name ? name : "unknown"; @@ -1122,6 +1149,8 @@ drm_gpuvm_fini(struct drm_gpuvm *gpuvm) "Extobj list should be empty.\n"); drm_WARN(gpuvm->drm, !list_empty(&gpuvm->evict.list), "Evict list should be empty.\n"); + drm_WARN(gpuvm->drm, !llist_empty(&gpuvm->bo_defer), + "VM BO cleanup list should be empty.\n"); drm_gem_object_put(gpuvm->r_obj); } @@ -1217,6 +1246,9 @@ drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm, drm_gpuvm_resv_assert_held(gpuvm); list_for_each_entry(vm_bo, &gpuvm->extobj.list, list.entry.extobj) { + if (drm_gpuvm_bo_is_zombie(vm_bo)) + continue; + ret = exec_prepare_obj(exec, vm_bo->obj, num_fences); if (ret) break; @@ -1460,6 +1492,9 @@ drm_gpuvm_validate_locked(struct drm_gpuvm *gpuvm, struct drm_exec *exec) list_for_each_entry_safe(vm_bo, next, &gpuvm->evict.list, list.entry.evict) { + if (drm_gpuvm_bo_is_zombie(vm_bo)) + continue; + ret = ops->vm_bo_validate(vm_bo, exec); if (ret) break; @@ -1560,6 +1595,7 @@ drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm, INIT_LIST_HEAD(&vm_bo->list.entry.extobj); INIT_LIST_HEAD(&vm_bo->list.entry.evict); + init_llist_node(&vm_bo->list.entry.bo_defer); return vm_bo; } @@ -1621,6 +1657,126 @@ drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo) } EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put); +/* + * drm_gpuvm_bo_into_zombie() - called when the vm_bo becomes a zombie due to + * deferred cleanup + * + * If deferred cleanup is used, then this must be called right after the vm_bo + * refcount drops to zero. Must be called with GEM mutex held. After releasing + * the GEM mutex, drm_gpuvm_bo_defer_zombie_cleanup() must be called. + */ +static void +drm_gpuvm_bo_into_zombie(struct kref *kref) +{ + struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo, + kref); + + if (!drm_gpuvm_resv_protected(vm_bo->vm)) { + drm_gpuvm_bo_list_del(vm_bo, extobj, true); + drm_gpuvm_bo_list_del(vm_bo, evict, true); + } + + list_del(&vm_bo->list.entry.gem); +} + +/* + * drm_gpuvm_bo_defer_zombie_cleanup() - adds a new zombie vm_bo to the + * bo_defer list + * + * Called after drm_gpuvm_bo_into_zombie(). GEM mutex must not be held. + * + * It's important that the GEM stays alive for the duration in which we hold + * the mutex, but the instant we add the vm_bo to bo_defer, another thread + * might call drm_gpuvm_bo_deferred_cleanup() and put the GEM. Therefore, to + * avoid kfreeing a mutex we are holding, the GEM mutex must be released + * *before* calling this function. + */ +static void +drm_gpuvm_bo_defer_zombie_cleanup(struct drm_gpuvm_bo *vm_bo) +{ + llist_add(&vm_bo->list.entry.bo_defer, &vm_bo->vm->bo_defer); +} + +static void +drm_gpuvm_bo_defer_free(struct kref *kref) +{ + struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo, + kref); + + drm_gpuvm_bo_into_zombie(kref); + mutex_unlock(&vm_bo->obj->gpuva.lock); + drm_gpuvm_bo_defer_zombie_cleanup(vm_bo); +} + +/** + * drm_gpuvm_bo_put_deferred() - drop a struct drm_gpuvm_bo reference with + * deferred cleanup + * @vm_bo: the &drm_gpuvm_bo to release the reference of + * + * This releases a reference to @vm_bo. + * + * This might take and release the GEMs GPUVA lock. You should call + * drm_gpuvm_bo_deferred_cleanup() later to complete the cleanup process. + * + * Returns: true if vm_bo is being destroyed, false otherwise. + */ +bool +drm_gpuvm_bo_put_deferred(struct drm_gpuvm_bo *vm_bo) +{ + if (!vm_bo) + return false; + + drm_WARN_ON(vm_bo->vm->drm, !drm_gpuvm_immediate_mode(vm_bo->vm)); + + return !!kref_put_mutex(&vm_bo->kref, + drm_gpuvm_bo_defer_free, + &vm_bo->obj->gpuva.lock); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put_deferred); + +/** + * drm_gpuvm_bo_deferred_cleanup() - clean up BOs in the deferred list + * deferred cleanup + * @gpuvm: the VM to clean up + * + * Cleans up &drm_gpuvm_bo instances in the deferred cleanup list. + */ +void +drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm) +{ + const struct drm_gpuvm_ops *ops = gpuvm->ops; + struct drm_gpuvm_bo *vm_bo; + struct drm_gem_object *obj; + struct llist_node *bo_defer; + + bo_defer = llist_del_all(&gpuvm->bo_defer); + if (!bo_defer) + return; + + if (drm_gpuvm_resv_protected(gpuvm)) { + dma_resv_lock(drm_gpuvm_resv(gpuvm), NULL); + llist_for_each_entry(vm_bo, bo_defer, list.entry.bo_defer) { + drm_gpuvm_bo_list_del(vm_bo, extobj, false); + drm_gpuvm_bo_list_del(vm_bo, evict, false); + } + dma_resv_unlock(drm_gpuvm_resv(gpuvm)); + } + + while (bo_defer) { + vm_bo = llist_entry(bo_defer, struct drm_gpuvm_bo, list.entry.bo_defer); + bo_defer = bo_defer->next; + obj = vm_bo->obj; + if (ops && ops->vm_bo_free) + ops->vm_bo_free(vm_bo); + else + kfree(vm_bo); + + drm_gpuvm_put(gpuvm); + drm_gem_object_put(obj); + } +} +EXPORT_SYMBOL_GPL(drm_gpuvm_bo_deferred_cleanup); + static struct drm_gpuvm_bo * __drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, struct drm_gem_object *obj) @@ -1948,6 +2104,40 @@ drm_gpuva_unlink(struct drm_gpuva *va) } EXPORT_SYMBOL_GPL(drm_gpuva_unlink); +/** + * drm_gpuva_unlink_defer() - unlink a &drm_gpuva with deferred vm_bo cleanup + * @va: the &drm_gpuva to unlink + * + * Similar to drm_gpuva_unlink(), but uses drm_gpuvm_bo_put_deferred() and takes + * the lock for the caller. + */ +void +drm_gpuva_unlink_defer(struct drm_gpuva *va) +{ + struct drm_gem_object *obj = va->gem.obj; + struct drm_gpuvm_bo *vm_bo = va->vm_bo; + bool should_defer_bo; + + if (unlikely(!obj)) + return; + + drm_WARN_ON(vm_bo->vm->drm, !drm_gpuvm_immediate_mode(vm_bo->vm)); + + mutex_lock(&obj->gpuva.lock); + list_del_init(&va->gem.entry); + + /* + * This is drm_gpuvm_bo_put_deferred() except we already hold the mutex. + */ + should_defer_bo = kref_put(&vm_bo->kref, drm_gpuvm_bo_into_zombie); + mutex_unlock(&obj->gpuva.lock); + if (should_defer_bo) + drm_gpuvm_bo_defer_zombie_cleanup(vm_bo); + + va->vm_bo = NULL; +} +EXPORT_SYMBOL_GPL(drm_gpuva_unlink_defer); + /** * drm_gpuva_find_first() - find the first &drm_gpuva in the given range * @gpuvm: the &drm_gpuvm to search in diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 8890ded1d907..81cc7672cf2d 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -152,6 +153,7 @@ void drm_gpuva_remove(struct drm_gpuva *va); void drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo); void drm_gpuva_unlink(struct drm_gpuva *va); +void drm_gpuva_unlink_defer(struct drm_gpuva *va); struct drm_gpuva *drm_gpuva_find(struct drm_gpuvm *gpuvm, u64 addr, u64 range); @@ -331,6 +333,11 @@ struct drm_gpuvm { */ spinlock_t lock; } evict; + + /** + * @bo_defer: structure holding vm_bos that need to be destroyed + */ + struct llist_head bo_defer; }; void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, @@ -714,6 +721,12 @@ struct drm_gpuvm_bo { * &drm_gpuvms evict list. */ struct list_head evict; + + /** + * @list.entry.bo_defer: List entry to attach to + * the &drm_gpuvms bo_defer list. + */ + struct llist_node bo_defer; } entry; } list; }; @@ -746,6 +759,9 @@ drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm_bo) bool drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo); +bool drm_gpuvm_bo_put_deferred(struct drm_gpuvm_bo *vm_bo); +void drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm); + struct drm_gpuvm_bo * drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, struct drm_gem_object *obj); -- cgit From 63e919a31625d5d2878cfc2511274826e29336b2 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 6 Oct 2025 12:05:56 +0000 Subject: panthor: use drm_gpuva_unlink_defer() Instead of manually deferring cleanup of vm_bos, use the new GPUVM infrastructure for doing so. To avoid manual management of vm_bo refcounts, the panthor_vma_link() and panthor_vma_unlink() methods are changed to get and put a vm_bo refcount on the vm_bo. This simplifies the code a lot. I preserved the behavior where panthor_gpuva_sm_step_map() drops the refcount right away rather than letting panthor_vm_cleanup_op_ctx() do it later. Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/r/20251006-vmbo-defer-v4-2-30cbd2c05adb@google.com Signed-off-by: Alice Ryhl --- drivers/gpu/drm/panthor/panthor_mmu.c | 110 ++++++---------------------------- 1 file changed, 19 insertions(+), 91 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 6dec4354e378..9f5f4ddf2910 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -181,20 +181,6 @@ struct panthor_vm_op_ctx { u64 range; } va; - /** - * @returned_vmas: List of panthor_vma objects returned after a VM operation. - * - * For unmap operations, this will contain all VMAs that were covered by the - * specified VA range. - * - * For map operations, this will contain all VMAs that previously mapped to - * the specified VA range. - * - * Those VMAs, and the resources they point to will be released as part of - * the op_ctx cleanup operation. - */ - struct list_head returned_vmas; - /** @map: Fields specific to a map operation. */ struct { /** @map.vm_bo: Buffer object to map. */ @@ -1081,47 +1067,18 @@ void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node) mutex_unlock(&vm->mm_lock); } -static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo) +static void panthor_vm_bo_free(struct drm_gpuvm_bo *vm_bo) { struct panthor_gem_object *bo = to_panthor_bo(vm_bo->obj); - struct drm_gpuvm *vm = vm_bo->vm; - bool unpin; - - /* We must retain the GEM before calling drm_gpuvm_bo_put(), - * otherwise the mutex might be destroyed while we hold it. - * Same goes for the VM, since we take the VM resv lock. - */ - drm_gem_object_get(&bo->base.base); - drm_gpuvm_get(vm); - - /* We take the resv lock to protect against concurrent accesses to the - * gpuvm evicted/extobj lists that are modified in - * drm_gpuvm_bo_destroy(), which is called if drm_gpuvm_bo_put() - * releases sthe last vm_bo reference. - * We take the BO GPUVA list lock to protect the vm_bo removal from the - * GEM vm_bo list. - */ - dma_resv_lock(drm_gpuvm_resv(vm), NULL); - mutex_lock(&bo->base.base.gpuva.lock); - unpin = drm_gpuvm_bo_put(vm_bo); - mutex_unlock(&bo->base.base.gpuva.lock); - dma_resv_unlock(drm_gpuvm_resv(vm)); - /* If the vm_bo object was destroyed, release the pin reference that - * was hold by this object. - */ - if (unpin && !drm_gem_is_imported(&bo->base.base)) + if (!drm_gem_is_imported(&bo->base.base)) drm_gem_shmem_unpin(&bo->base); - - drm_gpuvm_put(vm); - drm_gem_object_put(&bo->base.base); + kfree(vm_bo); } static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx, struct panthor_vm *vm) { - struct panthor_vma *vma, *tmp_vma; - u32 remaining_pt_count = op_ctx->rsvd_page_tables.count - op_ctx->rsvd_page_tables.ptr; @@ -1134,16 +1091,12 @@ static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx, kfree(op_ctx->rsvd_page_tables.pages); if (op_ctx->map.vm_bo) - panthor_vm_bo_put(op_ctx->map.vm_bo); + drm_gpuvm_bo_put_deferred(op_ctx->map.vm_bo); for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) kfree(op_ctx->preallocated_vmas[i]); - list_for_each_entry_safe(vma, tmp_vma, &op_ctx->returned_vmas, node) { - list_del(&vma->node); - panthor_vm_bo_put(vma->base.vm_bo); - kfree(vma); - } + drm_gpuvm_bo_deferred_cleanup(&vm->base); } static struct panthor_vma * @@ -1232,7 +1185,6 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, return -EINVAL; memset(op_ctx, 0, sizeof(*op_ctx)); - INIT_LIST_HEAD(&op_ctx->returned_vmas); op_ctx->flags = flags; op_ctx->va.range = size; op_ctx->va.addr = va; @@ -1243,7 +1195,9 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, if (!drm_gem_is_imported(&bo->base.base)) { /* Pre-reserve the BO pages, so the map operation doesn't have to - * allocate. + * allocate. This pin is dropped in panthor_vm_bo_free(), so + * once we have successfully called drm_gpuvm_bo_create(), + * GPUVM will take care of dropping the pin for us. */ ret = drm_gem_shmem_pin(&bo->base); if (ret) @@ -1282,16 +1236,6 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, mutex_unlock(&bo->base.base.gpuva.lock); dma_resv_unlock(panthor_vm_resv(vm)); - /* If the a vm_bo for this combination exists, it already - * retains a pin ref, and we can release the one we took earlier. - * - * If our pre-allocated vm_bo is picked, it now retains the pin ref, - * which will be released in panthor_vm_bo_put(). - */ - if (preallocated_vm_bo != op_ctx->map.vm_bo && - !drm_gem_is_imported(&bo->base.base)) - drm_gem_shmem_unpin(&bo->base); - op_ctx->map.bo_offset = offset; /* L1, L2 and L3 page tables. @@ -1339,7 +1283,6 @@ static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx, int ret; memset(op_ctx, 0, sizeof(*op_ctx)); - INIT_LIST_HEAD(&op_ctx->returned_vmas); op_ctx->va.range = size; op_ctx->va.addr = va; op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP; @@ -1387,7 +1330,6 @@ static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx struct panthor_vm *vm) { memset(op_ctx, 0, sizeof(*op_ctx)); - INIT_LIST_HEAD(&op_ctx->returned_vmas); op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY; } @@ -2033,26 +1975,13 @@ static void panthor_vma_link(struct panthor_vm *vm, mutex_lock(&bo->base.base.gpuva.lock); drm_gpuva_link(&vma->base, vm_bo); - drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo)); mutex_unlock(&bo->base.base.gpuva.lock); } -static void panthor_vma_unlink(struct panthor_vm *vm, - struct panthor_vma *vma) +static void panthor_vma_unlink(struct panthor_vma *vma) { - struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); - struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo); - - mutex_lock(&bo->base.base.gpuva.lock); - drm_gpuva_unlink(&vma->base); - mutex_unlock(&bo->base.base.gpuva.lock); - - /* drm_gpuva_unlink() release the vm_bo, but we manually retained it - * when entering this function, so we can implement deferred VMA - * destruction. Re-assign it here. - */ - vma->base.vm_bo = vm_bo; - list_add_tail(&vma->node, &vm->op_ctx->returned_vmas); + drm_gpuva_unlink_defer(&vma->base); + kfree(vma); } static void panthor_vma_init(struct panthor_vma *vma, u32 flags) @@ -2084,12 +2013,12 @@ static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv) if (ret) return ret; - /* Ref owned by the mapping now, clear the obj field so we don't release the - * pinning/obj ref behind GPUVA's back. - */ drm_gpuva_map(&vm->base, &vma->base, &op->map); panthor_vma_link(vm, vma, op_ctx->map.vm_bo); + + drm_gpuvm_bo_put_deferred(op_ctx->map.vm_bo); op_ctx->map.vm_bo = NULL; + return 0; } @@ -2128,16 +2057,14 @@ static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op, * owned by the old mapping which will be released when this * mapping is destroyed, we need to grab a ref here. */ - panthor_vma_link(vm, prev_vma, - drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo)); + panthor_vma_link(vm, prev_vma, op->remap.unmap->va->vm_bo); } if (next_vma) { - panthor_vma_link(vm, next_vma, - drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo)); + panthor_vma_link(vm, next_vma, op->remap.unmap->va->vm_bo); } - panthor_vma_unlink(vm, unmap_vma); + panthor_vma_unlink(unmap_vma); return 0; } @@ -2154,12 +2081,13 @@ static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op, return ret; drm_gpuva_unmap(&op->unmap); - panthor_vma_unlink(vm, unmap_vma); + panthor_vma_unlink(unmap_vma); return 0; } static const struct drm_gpuvm_ops panthor_gpuvm_ops = { .vm_free = panthor_vm_free, + .vm_bo_free = panthor_vm_bo_free, .sm_step_map = panthor_gpuva_sm_step_map, .sm_step_remap = panthor_gpuva_sm_step_remap, .sm_step_unmap = panthor_gpuva_sm_step_unmap, -- cgit From 3f674dc4ef1b3783f9d8dae33b46bf50eaac7c79 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 16 Oct 2025 11:13:20 -0400 Subject: gpu: nova-core: register: use field type for Into implementation The getter method of a field works with the field type, but its setter expects the type of the register. This leads to an asymmetry in the From/Into implementations required for a field with a dedicated type. For instance, a field declared as pub struct ControlReg(u32) { 3:0 mode as u8 ?=> Mode; ... } currently requires the following implementations: impl TryFrom for Mode { ... } impl From for u32 { ... } Change this so the `From` now needs to be implemented for `u8`, i.e. the primitive type of the field. This is more consistent, and will become a requirement once we start using the TryFrom/Into derive macros to implement these automatically. Reported-by: Edwin Peer Closes: https://lore.kernel.org/rust-for-linux/F3853912-2C1C-4F9B-89B0-3168689F35B3@nvidia.com/ Reviewed-by: Joel Fernandes Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251016151323.1201196-2-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 38 +++++++++++++++++++++++++----------- drivers/gpu/nova-core/regs/macros.rs | 10 +++++----- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 37e6298195e4..3f505b870601 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -22,11 +22,11 @@ mod hal; pub(crate) mod sec2; // TODO[FPRI]: Replace with `ToPrimitive`. -macro_rules! impl_from_enum_to_u32 { +macro_rules! impl_from_enum_to_u8 { ($enum_type:ty) => { - impl From<$enum_type> for u32 { + impl From<$enum_type> for u8 { fn from(value: $enum_type) -> Self { - value as u32 + value as u8 } } }; @@ -46,7 +46,7 @@ pub(crate) enum FalconCoreRev { Rev6 = 6, Rev7 = 7, } -impl_from_enum_to_u32!(FalconCoreRev); +impl_from_enum_to_u8!(FalconCoreRev); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconCoreRev { @@ -81,7 +81,7 @@ pub(crate) enum FalconCoreRevSubversion { Subversion2 = 2, Subversion3 = 3, } -impl_from_enum_to_u32!(FalconCoreRevSubversion); +impl_from_enum_to_u8!(FalconCoreRevSubversion); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconCoreRevSubversion { @@ -125,7 +125,7 @@ pub(crate) enum FalconSecurityModel { /// Also known as High-Secure, Privilege Level 3 or PL3. Heavy = 3, } -impl_from_enum_to_u32!(FalconSecurityModel); +impl_from_enum_to_u8!(FalconSecurityModel); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconSecurityModel { @@ -157,7 +157,7 @@ pub(crate) enum FalconModSelAlgo { #[default] Rsa3k = 1, } -impl_from_enum_to_u32!(FalconModSelAlgo); +impl_from_enum_to_u8!(FalconModSelAlgo); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconModSelAlgo { @@ -179,7 +179,7 @@ pub(crate) enum DmaTrfCmdSize { #[default] Size256B = 0x6, } -impl_from_enum_to_u32!(DmaTrfCmdSize); +impl_from_enum_to_u8!(DmaTrfCmdSize); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for DmaTrfCmdSize { @@ -202,7 +202,6 @@ pub(crate) enum PeregrineCoreSelect { /// RISC-V core is active. Riscv = 1, } -impl_from_enum_to_u32!(PeregrineCoreSelect); impl From for PeregrineCoreSelect { fn from(value: bool) -> Self { @@ -213,6 +212,15 @@ impl From for PeregrineCoreSelect { } } +impl From for bool { + fn from(value: PeregrineCoreSelect) -> Self { + match value { + PeregrineCoreSelect::Falcon => false, + PeregrineCoreSelect::Riscv => true, + } + } +} + /// Different types of memory present in a falcon core. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum FalconMem { @@ -236,7 +244,7 @@ pub(crate) enum FalconFbifTarget { /// Non-coherent system memory (System DRAM). NoncoherentSysmem = 2, } -impl_from_enum_to_u32!(FalconFbifTarget); +impl_from_enum_to_u8!(FalconFbifTarget); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconFbifTarget { @@ -263,7 +271,6 @@ pub(crate) enum FalconFbifMemType { /// Physical memory addresses. Physical = 1, } -impl_from_enum_to_u32!(FalconFbifMemType); /// Conversion from a single-bit register field. impl From for FalconFbifMemType { @@ -275,6 +282,15 @@ impl From for FalconFbifMemType { } } +impl From for bool { + fn from(value: FalconFbifMemType) -> Self { + match value { + FalconFbifMemType::Virtual => false, + FalconFbifMemType::Physical => true, + } + } +} + /// Type used to represent the `PFALCON` registers address base for a given falcon engine. pub(crate) struct PFalconBase(()); diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 8058e1696df9..1c54a4533822 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -482,7 +482,7 @@ macro_rules! register { register!( @leaf_accessor $name $hi:$lo $field { |f| <$into_type>::from(if f != 0 { true } else { false }) } - $into_type => $into_type $(, $comment)?; + bool $into_type => $into_type $(, $comment)?; ); }; @@ -499,7 +499,7 @@ macro_rules! register { $(, $comment:literal)?; ) => { register!(@leaf_accessor $name $hi:$lo $field - { |f| <$try_into_type>::try_from(f as $type) } $try_into_type => + { |f| <$try_into_type>::try_from(f as $type) } $type $try_into_type => ::core::result::Result< $try_into_type, <$try_into_type as ::core::convert::TryFrom<$type>>::Error @@ -513,7 +513,7 @@ macro_rules! register { $(, $comment:literal)?; ) => { register!(@leaf_accessor $name $hi:$lo $field - { |f| <$into_type>::from(f as $type) } $into_type => $into_type $(, $comment)?;); + { |f| <$into_type>::from(f as $type) } $type $into_type => $into_type $(, $comment)?;); }; // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. @@ -527,7 +527,7 @@ macro_rules! register { // Generates the accessor methods for a single field. ( @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident - { $process:expr } $to_type:ty => $res_type:ty $(, $comment:literal)?; + { $process:expr } $prim_type:tt $to_type:ty => $res_type:ty $(, $comment:literal)?; ) => { ::kernel::macros::paste!( const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive = $lo..=$hi; @@ -559,7 +559,7 @@ macro_rules! register { pub(crate) fn [](mut self, value: $to_type) -> Self { const MASK: u32 = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; - let value = (u32::from(value) << SHIFT) & MASK; + let value = (u32::from($prim_type::from(value)) << SHIFT) & MASK; self.0 = (self.0 & !MASK) | value; self -- cgit From 71ea85be25b4f54a53ec03d5deaed52f5ee65da8 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 16 Oct 2025 11:13:21 -0400 Subject: gpu: nova-core: bitfield: Move bitfield-specific code from register! into new macro Move the bitfield-specific code from the register macro into a new macro called bitfield. This will be used to define structs with bitfields, similar to C language. Reviewed-by: Elle Rhumsaa Reviewed-by: Alexandre Courbot Reviewed-by: Edwin Peer Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251016151323.1201196-3-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/bitfield.rs | 319 +++++++++++++++++++++++++++++++++++ drivers/gpu/nova-core/nova_core.rs | 3 + drivers/gpu/nova-core/regs/macros.rs | 259 ++-------------------------- 3 files changed, 332 insertions(+), 249 deletions(-) create mode 100644 drivers/gpu/nova-core/bitfield.rs diff --git a/drivers/gpu/nova-core/bitfield.rs b/drivers/gpu/nova-core/bitfield.rs new file mode 100644 index 000000000000..fb60800898c5 --- /dev/null +++ b/drivers/gpu/nova-core/bitfield.rs @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Bitfield library for Rust structures +//! +//! Support for defining bitfields in Rust structures. Also used by the [`register!`] macro. + +/// Defines a struct with accessors to access bits within an inner unsigned integer. +/// +/// # Syntax +/// +/// ```rust +/// use nova_core::bitfield; +/// +/// #[derive(Debug, Clone, Copy, Default)] +/// enum Mode { +/// #[default] +/// Low = 0, +/// High = 1, +/// Auto = 2, +/// } +/// +/// impl TryFrom for Mode { +/// type Error = u8; +/// fn try_from(value: u8) -> Result { +/// match value { +/// 0 => Ok(Mode::Low), +/// 1 => Ok(Mode::High), +/// 2 => Ok(Mode::Auto), +/// _ => Err(value), +/// } +/// } +/// } +/// +/// impl From for u8 { +/// fn from(mode: Mode) -> u8 { +/// mode as u8 +/// } +/// } +/// +/// #[derive(Debug, Clone, Copy, Default)] +/// enum State { +/// #[default] +/// Inactive = 0, +/// Active = 1, +/// } +/// +/// impl From for State { +/// fn from(value: bool) -> Self { +/// if value { State::Active } else { State::Inactive } +/// } +/// } +/// +/// impl From for bool { +/// fn from(state: State) -> bool { +/// match state { +/// State::Inactive => false, +/// State::Active => true, +/// } +/// } +/// } +/// +/// bitfield! { +/// struct ControlReg { +/// 7:7 state as bool => State; +/// 3:0 mode as u8 ?=> Mode; +/// } +/// } +/// ``` +/// +/// This generates a struct with: +/// - Field accessors: `mode()`, `state()`, etc. +/// - Field setters: `set_mode()`, `set_state()`, etc. (supports chaining with builder pattern). +/// - Debug and Default implementations. +/// +/// Fields are defined as follows: +/// +/// - `as ` simply returns the field value casted to , typically `u32`, `u16`, `u8` or +/// `bool`. Note that `bool` fields must have a range of 1 bit. +/// - `as => ` calls ``'s `From::<>` implementation and returns +/// the result. +/// - `as ?=> ` calls ``'s `TryFrom::<>` implementation +/// and returns the result. This is useful with fields for which not all values are valid. +macro_rules! bitfield { + // Main entry point - defines the bitfield struct with fields + (struct $name:ident $(, $comment:literal)? { $($fields:tt)* }) => { + bitfield!(@core $name $(, $comment)? { $($fields)* }); + }; + + // All rules below are helpers. + + // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, + // `Default`, `BitOr`, and conversion to the value type) and field accessor methods. + (@core $name:ident $(, $comment:literal)? { $($fields:tt)* }) => { + $( + #[doc=$comment] + )? + #[repr(transparent)] + #[derive(Clone, Copy)] + pub(crate) struct $name(u32); + + impl ::core::ops::BitOr for $name { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self::Output { + Self(self.0 | rhs.0) + } + } + + impl ::core::convert::From<$name> for u32 { + fn from(val: $name) -> u32 { + val.0 + } + } + + bitfield!(@fields_dispatcher $name { $($fields)* }); + }; + + // Captures the fields and passes them to all the implementers that require field information. + // + // Used to simplify the matching rules for implementers, so they don't need to match the entire + // complex fields rule even though they only make use of part of it. + (@fields_dispatcher $name:ident { + $($hi:tt:$lo:tt $field:ident as $type:tt + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + $(, $comment:literal)? + ; + )* + } + ) => { + bitfield!(@field_accessors $name { + $( + $hi:$lo $field as $type + $(?=> $try_into_type)? + $(=> $into_type)? + $(, $comment)? + ; + )* + }); + bitfield!(@debug $name { $($field;)* }); + bitfield!(@default $name { $($field;)* }); + }; + + // Defines all the field getter/setter methods for `$name`. + ( + @field_accessors $name:ident { + $($hi:tt:$lo:tt $field:ident as $type:tt + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + $(, $comment:literal)? + ; + )* + } + ) => { + $( + bitfield!(@check_field_bounds $hi:$lo $field as $type); + )* + + #[allow(dead_code)] + impl $name { + $( + bitfield!(@field_accessor $name $hi:$lo $field as $type + $(?=> $try_into_type)? + $(=> $into_type)? + $(, $comment)? + ; + ); + )* + } + }; + + // Boolean fields must have `$hi == $lo`. + (@check_field_bounds $hi:tt:$lo:tt $field:ident as bool) => { + #[allow(clippy::eq_op)] + const _: () = { + ::kernel::build_assert!( + $hi == $lo, + concat!("boolean field `", stringify!($field), "` covers more than one bit") + ); + }; + }; + + // Non-boolean fields must have `$hi >= $lo`. + (@check_field_bounds $hi:tt:$lo:tt $field:ident as $type:tt) => { + #[allow(clippy::eq_op)] + const _: () = { + ::kernel::build_assert!( + $hi >= $lo, + concat!("field `", stringify!($field), "`'s MSB is smaller than its LSB") + ); + }; + }; + + // Catches fields defined as `bool` and convert them into a boolean value. + ( + @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as bool => $into_type:ty + $(, $comment:literal)?; + ) => { + bitfield!( + @leaf_accessor $name $hi:$lo $field + { |f| <$into_type>::from(if f != 0 { true } else { false }) } + bool $into_type => $into_type $(, $comment)?; + ); + }; + + // Shortcut for fields defined as `bool` without the `=>` syntax. + ( + @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as bool $(, $comment:literal)?; + ) => { + bitfield!(@field_accessor $name $hi:$lo $field as bool => bool $(, $comment)?;); + }; + + // Catches the `?=>` syntax for non-boolean fields. + ( + @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt ?=> $try_into_type:ty + $(, $comment:literal)?; + ) => { + bitfield!(@leaf_accessor $name $hi:$lo $field + { |f| <$try_into_type>::try_from(f as $type) } $type $try_into_type => + ::core::result::Result< + $try_into_type, + <$try_into_type as ::core::convert::TryFrom<$type>>::Error + > + $(, $comment)?;); + }; + + // Catches the `=>` syntax for non-boolean fields. + ( + @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt => $into_type:ty + $(, $comment:literal)?; + ) => { + bitfield!(@leaf_accessor $name $hi:$lo $field + { |f| <$into_type>::from(f as $type) } $type $into_type => $into_type $(, $comment)?;); + }; + + // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. + ( + @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt + $(, $comment:literal)?; + ) => { + bitfield!(@field_accessor $name $hi:$lo $field as $type => $type $(, $comment)?;); + }; + + // Generates the accessor methods for a single field. + ( + @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident + { $process:expr } $prim_type:tt $to_type:ty => $res_type:ty $(, $comment:literal)?; + ) => { + ::kernel::macros::paste!( + const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive = $lo..=$hi; + const [<$field:upper _MASK>]: u32 = ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); + const [<$field:upper _SHIFT>]: u32 = Self::[<$field:upper _MASK>].trailing_zeros(); + ); + + $( + #[doc="Returns the value of this field:"] + #[doc=$comment] + )? + #[inline(always)] + pub(crate) fn $field(self) -> $res_type { + ::kernel::macros::paste!( + const MASK: u32 = $name::[<$field:upper _MASK>]; + const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; + ); + let field = ((self.0 & MASK) >> SHIFT); + + $process(field) + } + + ::kernel::macros::paste!( + $( + #[doc="Sets the value of this field:"] + #[doc=$comment] + )? + #[inline(always)] + pub(crate) fn [](mut self, value: $to_type) -> Self { + const MASK: u32 = $name::[<$field:upper _MASK>]; + const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; + let value = (u32::from($prim_type::from(value)) << SHIFT) & MASK; + self.0 = (self.0 & !MASK) | value; + + self + } + ); + }; + + // Generates the `Debug` implementation for `$name`. + (@debug $name:ident { $($field:ident;)* }) => { + impl ::kernel::fmt::Debug for $name { + fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result { + f.debug_struct(stringify!($name)) + .field("", &::kernel::prelude::fmt!("{:#x}", &self.0)) + $( + .field(stringify!($field), &self.$field()) + )* + .finish() + } + } + }; + + // Generates the `Default` implementation for `$name`. + (@default $name:ident { $($field:ident;)* }) => { + /// Returns a value for the bitfield where all fields are set to their default value. + impl ::core::default::Default for $name { + fn default() -> Self { + #[allow(unused_mut)] + let mut value = Self(Default::default()); + + ::kernel::macros::paste!( + $( + value.[](Default::default()); + )* + ); + + value + } + } + }; +} diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index fffcaee2249f..112277c7921e 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -2,6 +2,9 @@ //! Nova Core GPU Driver +#[macro_use] +mod bitfield; + mod dma; mod driver; mod falcon; diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 1c54a4533822..945d15a2c529 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -8,7 +8,8 @@ //! //! The `register!` macro in this module provides an intuitive and readable syntax for defining a //! dedicated type for each register. Each such type comes with its own field accessors that can -//! return an error if a field's value is invalid. +//! return an error if a field's value is invalid. Please look at the [`bitfield`] macro for the +//! complete syntax of fields definitions. /// Trait providing a base address to be added to the offset of a relative register to obtain /// its actual offset. @@ -54,15 +55,6 @@ pub(crate) trait RegisterBase { /// BOOT_0::alter(&bar, |r| r.set_major_revision(3).set_minor_revision(10)); /// ``` /// -/// Fields are defined as follows: -/// -/// - `as ` simply returns the field value casted to , typically `u32`, `u16`, `u8` or -/// `bool`. Note that `bool` fields must have a range of 1 bit. -/// - `as => ` calls ``'s `From::<>` implementation and returns -/// the result. -/// - `as ?=> ` calls ``'s `TryFrom::<>` implementation -/// and returns the result. This is useful with fields for which not all values are valid. -/// /// The documentation strings are optional. If present, they will be added to the type's /// definition, or the field getter and setter methods they are attached to. /// @@ -284,25 +276,25 @@ pub(crate) trait RegisterBase { macro_rules! register { // Creates a register at a fixed offset of the MMIO space. ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name $(, $comment)? { $($fields)* } ); register!(@io_fixed $name @ $offset); }; // Creates an alias register of fixed offset register `alias` with its own fields. ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name $(, $comment)? { $($fields)* } ); register!(@io_fixed $name @ $alias::OFFSET); }; // Creates a register at a relative offset from a base address provider. ($name:ident @ $base:ty [ $offset:literal ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name $(, $comment)? { $($fields)* } ); register!(@io_relative $name @ $base [ $offset ]); }; // Creates an alias register of relative offset register `alias` with its own fields. ($name:ident => $base:ty [ $alias:ident ] $(, $comment:literal)? { $($fields:tt)* }) => { - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name $(, $comment)? { $($fields)* } ); register!(@io_relative $name @ $base [ $alias::OFFSET ]); }; @@ -313,7 +305,7 @@ macro_rules! register { } ) => { static_assert!(::core::mem::size_of::() <= $stride); - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name $(, $comment)? { $($fields)* } ); register!(@io_array $name @ $offset [ $size ; $stride ]); }; @@ -334,7 +326,7 @@ macro_rules! register { $(, $comment:literal)? { $($fields:tt)* } ) => { static_assert!(::core::mem::size_of::() <= $stride); - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name $(, $comment)? { $($fields)* } ); register!(@io_relative_array $name @ $base [ $offset [ $size ; $stride ] ]); }; @@ -356,7 +348,7 @@ macro_rules! register { } ) => { static_assert!($idx < $alias::SIZE); - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name $(, $comment)? { $($fields)* } ); register!(@io_relative $name @ $base [ $alias::OFFSET + $idx * $alias::STRIDE ] ); }; @@ -365,241 +357,10 @@ macro_rules! register { // to avoid it being interpreted in place of the relative register array alias rule. ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => { static_assert!($idx < $alias::SIZE); - register!(@core $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name $(, $comment)? { $($fields)* } ); register!(@io_fixed $name @ $alias::OFFSET + $idx * $alias::STRIDE ); }; - // All rules below are helpers. - - // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, - // `Default`, `BitOr`, and conversion to the value type) and field accessor methods. - (@core $name:ident $(, $comment:literal)? { $($fields:tt)* }) => { - $( - #[doc=$comment] - )? - #[repr(transparent)] - #[derive(Clone, Copy)] - pub(crate) struct $name(u32); - - impl ::core::ops::BitOr for $name { - type Output = Self; - - fn bitor(self, rhs: Self) -> Self::Output { - Self(self.0 | rhs.0) - } - } - - impl ::core::convert::From<$name> for u32 { - fn from(reg: $name) -> u32 { - reg.0 - } - } - - register!(@fields_dispatcher $name { $($fields)* }); - }; - - // Captures the fields and passes them to all the implementers that require field information. - // - // Used to simplify the matching rules for implementers, so they don't need to match the entire - // complex fields rule even though they only make use of part of it. - (@fields_dispatcher $name:ident { - $($hi:tt:$lo:tt $field:ident as $type:tt - $(?=> $try_into_type:ty)? - $(=> $into_type:ty)? - $(, $comment:literal)? - ; - )* - } - ) => { - register!(@field_accessors $name { - $( - $hi:$lo $field as $type - $(?=> $try_into_type)? - $(=> $into_type)? - $(, $comment)? - ; - )* - }); - register!(@debug $name { $($field;)* }); - register!(@default $name { $($field;)* }); - }; - - // Defines all the field getter/methods methods for `$name`. - ( - @field_accessors $name:ident { - $($hi:tt:$lo:tt $field:ident as $type:tt - $(?=> $try_into_type:ty)? - $(=> $into_type:ty)? - $(, $comment:literal)? - ; - )* - } - ) => { - $( - register!(@check_field_bounds $hi:$lo $field as $type); - )* - - #[allow(dead_code)] - impl $name { - $( - register!(@field_accessor $name $hi:$lo $field as $type - $(?=> $try_into_type)? - $(=> $into_type)? - $(, $comment)? - ; - ); - )* - } - }; - - // Boolean fields must have `$hi == $lo`. - (@check_field_bounds $hi:tt:$lo:tt $field:ident as bool) => { - #[allow(clippy::eq_op)] - const _: () = { - ::kernel::build_assert!( - $hi == $lo, - concat!("boolean field `", stringify!($field), "` covers more than one bit") - ); - }; - }; - - // Non-boolean fields must have `$hi >= $lo`. - (@check_field_bounds $hi:tt:$lo:tt $field:ident as $type:tt) => { - #[allow(clippy::eq_op)] - const _: () = { - ::kernel::build_assert!( - $hi >= $lo, - concat!("field `", stringify!($field), "`'s MSB is smaller than its LSB") - ); - }; - }; - - // Catches fields defined as `bool` and convert them into a boolean value. - ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as bool => $into_type:ty - $(, $comment:literal)?; - ) => { - register!( - @leaf_accessor $name $hi:$lo $field - { |f| <$into_type>::from(if f != 0 { true } else { false }) } - bool $into_type => $into_type $(, $comment)?; - ); - }; - - // Shortcut for fields defined as `bool` without the `=>` syntax. - ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as bool $(, $comment:literal)?; - ) => { - register!(@field_accessor $name $hi:$lo $field as bool => bool $(, $comment)?;); - }; - - // Catches the `?=>` syntax for non-boolean fields. - ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt ?=> $try_into_type:ty - $(, $comment:literal)?; - ) => { - register!(@leaf_accessor $name $hi:$lo $field - { |f| <$try_into_type>::try_from(f as $type) } $type $try_into_type => - ::core::result::Result< - $try_into_type, - <$try_into_type as ::core::convert::TryFrom<$type>>::Error - > - $(, $comment)?;); - }; - - // Catches the `=>` syntax for non-boolean fields. - ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt => $into_type:ty - $(, $comment:literal)?; - ) => { - register!(@leaf_accessor $name $hi:$lo $field - { |f| <$into_type>::from(f as $type) } $type $into_type => $into_type $(, $comment)?;); - }; - - // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. - ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt - $(, $comment:literal)?; - ) => { - register!(@field_accessor $name $hi:$lo $field as $type => $type $(, $comment)?;); - }; - - // Generates the accessor methods for a single field. - ( - @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident - { $process:expr } $prim_type:tt $to_type:ty => $res_type:ty $(, $comment:literal)?; - ) => { - ::kernel::macros::paste!( - const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive = $lo..=$hi; - const [<$field:upper _MASK>]: u32 = ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); - const [<$field:upper _SHIFT>]: u32 = Self::[<$field:upper _MASK>].trailing_zeros(); - ); - - $( - #[doc="Returns the value of this field:"] - #[doc=$comment] - )? - #[inline(always)] - pub(crate) fn $field(self) -> $res_type { - ::kernel::macros::paste!( - const MASK: u32 = $name::[<$field:upper _MASK>]; - const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; - ); - let field = ((self.0 & MASK) >> SHIFT); - - $process(field) - } - - ::kernel::macros::paste!( - $( - #[doc="Sets the value of this field:"] - #[doc=$comment] - )? - #[inline(always)] - pub(crate) fn [](mut self, value: $to_type) -> Self { - const MASK: u32 = $name::[<$field:upper _MASK>]; - const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; - let value = (u32::from($prim_type::from(value)) << SHIFT) & MASK; - self.0 = (self.0 & !MASK) | value; - - self - } - ); - }; - - // Generates the `Debug` implementation for `$name`. - (@debug $name:ident { $($field:ident;)* }) => { - impl ::kernel::fmt::Debug for $name { - fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result { - f.debug_struct(stringify!($name)) - .field("", &::kernel::prelude::fmt!("{:#x}", &self.0)) - $( - .field(stringify!($field), &self.$field()) - )* - .finish() - } - } - }; - - // Generates the `Default` implementation for `$name`. - (@default $name:ident { $($field:ident;)* }) => { - /// Returns a value for the register where all fields are set to their default value. - impl ::core::default::Default for $name { - fn default() -> Self { - #[allow(unused_mut)] - let mut value = Self(Default::default()); - - ::kernel::macros::paste!( - $( - value.[](Default::default()); - )* - ); - - value - } - } - }; - // Generates the IO accessors for a fixed offset register. (@io_fixed $name:ident @ $offset:expr) => { #[allow(dead_code)] -- cgit From 7cabacb1aad647931980f63fa18292ad3f859544 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 16 Oct 2025 11:13:22 -0400 Subject: gpu: nova-core: bitfield: Add support for different storage widths Previously, bitfields were hardcoded to use u32 as the underlying storage type. Add support for different storage types (u8, u16, u32, u64) to the bitfield macro. New syntax is: struct Name() { ... } Reviewed-by: Alexandre Courbot Reviewed-by: Elle Rhumsaa Reviewed-by: Edwin Peer Signed-off-by: Joel Fernandes [acourbot@nvidia.com: fix long lines warnings.] Signed-off-by: Alexandre Courbot Message-ID: <20251016151323.1201196-4-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/bitfield.rs | 70 +++++++++++++++++++++--------------- drivers/gpu/nova-core/regs/macros.rs | 16 ++++----- 2 files changed, 49 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/nova-core/bitfield.rs b/drivers/gpu/nova-core/bitfield.rs index fb60800898c5..eff938937df4 100644 --- a/drivers/gpu/nova-core/bitfield.rs +++ b/drivers/gpu/nova-core/bitfield.rs @@ -60,7 +60,7 @@ /// } /// /// bitfield! { -/// struct ControlReg { +/// struct ControlReg(u32) { /// 7:7 state as bool => State; /// 3:0 mode as u8 ?=> Mode; /// } @@ -70,6 +70,8 @@ /// This generates a struct with: /// - Field accessors: `mode()`, `state()`, etc. /// - Field setters: `set_mode()`, `set_state()`, etc. (supports chaining with builder pattern). +/// Note that the compiler will error out if the size of the setter's arg exceeds the +/// struct's storage size. /// - Debug and Default implementations. /// /// Fields are defined as follows: @@ -82,21 +84,21 @@ /// and returns the result. This is useful with fields for which not all values are valid. macro_rules! bitfield { // Main entry point - defines the bitfield struct with fields - (struct $name:ident $(, $comment:literal)? { $($fields:tt)* }) => { - bitfield!(@core $name $(, $comment)? { $($fields)* }); + (struct $name:ident($storage:ty) $(, $comment:literal)? { $($fields:tt)* }) => { + bitfield!(@core $name $storage $(, $comment)? { $($fields)* }); }; // All rules below are helpers. // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, // `Default`, `BitOr`, and conversion to the value type) and field accessor methods. - (@core $name:ident $(, $comment:literal)? { $($fields:tt)* }) => { + (@core $name:ident $storage:ty $(, $comment:literal)? { $($fields:tt)* }) => { $( #[doc=$comment] )? #[repr(transparent)] #[derive(Clone, Copy)] - pub(crate) struct $name(u32); + pub(crate) struct $name($storage); impl ::core::ops::BitOr for $name { type Output = Self; @@ -106,20 +108,20 @@ macro_rules! bitfield { } } - impl ::core::convert::From<$name> for u32 { - fn from(val: $name) -> u32 { + impl ::core::convert::From<$name> for $storage { + fn from(val: $name) -> $storage { val.0 } } - bitfield!(@fields_dispatcher $name { $($fields)* }); + bitfield!(@fields_dispatcher $name $storage { $($fields)* }); }; // Captures the fields and passes them to all the implementers that require field information. // // Used to simplify the matching rules for implementers, so they don't need to match the entire // complex fields rule even though they only make use of part of it. - (@fields_dispatcher $name:ident { + (@fields_dispatcher $name:ident $storage:ty { $($hi:tt:$lo:tt $field:ident as $type:tt $(?=> $try_into_type:ty)? $(=> $into_type:ty)? @@ -128,7 +130,7 @@ macro_rules! bitfield { )* } ) => { - bitfield!(@field_accessors $name { + bitfield!(@field_accessors $name $storage { $( $hi:$lo $field as $type $(?=> $try_into_type)? @@ -143,7 +145,7 @@ macro_rules! bitfield { // Defines all the field getter/setter methods for `$name`. ( - @field_accessors $name:ident { + @field_accessors $name:ident $storage:ty { $($hi:tt:$lo:tt $field:ident as $type:tt $(?=> $try_into_type:ty)? $(=> $into_type:ty)? @@ -159,7 +161,7 @@ macro_rules! bitfield { #[allow(dead_code)] impl $name { $( - bitfield!(@field_accessor $name $hi:$lo $field as $type + bitfield!(@field_accessor $name $storage, $hi:$lo $field as $type $(?=> $try_into_type)? $(=> $into_type)? $(, $comment)? @@ -193,11 +195,11 @@ macro_rules! bitfield { // Catches fields defined as `bool` and convert them into a boolean value. ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as bool => $into_type:ty + @field_accessor $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as bool => $into_type:ty $(, $comment:literal)?; ) => { bitfield!( - @leaf_accessor $name $hi:$lo $field + @leaf_accessor $name $storage, $hi:$lo $field { |f| <$into_type>::from(if f != 0 { true } else { false }) } bool $into_type => $into_type $(, $comment)?; ); @@ -205,17 +207,18 @@ macro_rules! bitfield { // Shortcut for fields defined as `bool` without the `=>` syntax. ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as bool $(, $comment:literal)?; + @field_accessor $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as bool + $(, $comment:literal)?; ) => { - bitfield!(@field_accessor $name $hi:$lo $field as bool => bool $(, $comment)?;); + bitfield!(@field_accessor $name $storage, $hi:$lo $field as bool => bool $(, $comment)?;); }; // Catches the `?=>` syntax for non-boolean fields. ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt ?=> $try_into_type:ty - $(, $comment:literal)?; + @field_accessor $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt + ?=> $try_into_type:ty $(, $comment:literal)?; ) => { - bitfield!(@leaf_accessor $name $hi:$lo $field + bitfield!(@leaf_accessor $name $storage, $hi:$lo $field { |f| <$try_into_type>::try_from(f as $type) } $type $try_into_type => ::core::result::Result< $try_into_type, @@ -226,29 +229,38 @@ macro_rules! bitfield { // Catches the `=>` syntax for non-boolean fields. ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt => $into_type:ty - $(, $comment:literal)?; + @field_accessor $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt + => $into_type:ty $(, $comment:literal)?; ) => { - bitfield!(@leaf_accessor $name $hi:$lo $field + bitfield!(@leaf_accessor $name $storage, $hi:$lo $field { |f| <$into_type>::from(f as $type) } $type $into_type => $into_type $(, $comment)?;); }; // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. ( - @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt + @field_accessor $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt $(, $comment:literal)?; ) => { - bitfield!(@field_accessor $name $hi:$lo $field as $type => $type $(, $comment)?;); + bitfield!(@field_accessor $name $storage, $hi:$lo $field as $type => $type $(, $comment)?;); }; // Generates the accessor methods for a single field. ( - @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident + @leaf_accessor $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident { $process:expr } $prim_type:tt $to_type:ty => $res_type:ty $(, $comment:literal)?; ) => { ::kernel::macros::paste!( const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive = $lo..=$hi; - const [<$field:upper _MASK>]: u32 = ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); + const [<$field:upper _MASK>]: $storage = { + // Generate mask for shifting + match ::core::mem::size_of::<$storage>() { + 1 => ::kernel::bits::genmask_u8($lo..=$hi) as $storage, + 2 => ::kernel::bits::genmask_u16($lo..=$hi) as $storage, + 4 => ::kernel::bits::genmask_u32($lo..=$hi) as $storage, + 8 => ::kernel::bits::genmask_u64($lo..=$hi) as $storage, + _ => ::kernel::build_error!("Unsupported storage type size") + } + }; const [<$field:upper _SHIFT>]: u32 = Self::[<$field:upper _MASK>].trailing_zeros(); ); @@ -259,7 +271,7 @@ macro_rules! bitfield { #[inline(always)] pub(crate) fn $field(self) -> $res_type { ::kernel::macros::paste!( - const MASK: u32 = $name::[<$field:upper _MASK>]; + const MASK: $storage = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; ); let field = ((self.0 & MASK) >> SHIFT); @@ -274,9 +286,9 @@ macro_rules! bitfield { )? #[inline(always)] pub(crate) fn [](mut self, value: $to_type) -> Self { - const MASK: u32 = $name::[<$field:upper _MASK>]; + const MASK: $storage = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; - let value = (u32::from($prim_type::from(value)) << SHIFT) & MASK; + let value = ($storage::from($prim_type::from(value)) << SHIFT) & MASK; self.0 = (self.0 & !MASK) | value; self diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 945d15a2c529..ffd7d5cb73bb 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -276,25 +276,25 @@ pub(crate) trait RegisterBase { macro_rules! register { // Creates a register at a fixed offset of the MMIO space. ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { - bitfield!(struct $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_fixed $name @ $offset); }; // Creates an alias register of fixed offset register `alias` with its own fields. ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { - bitfield!(struct $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_fixed $name @ $alias::OFFSET); }; // Creates a register at a relative offset from a base address provider. ($name:ident @ $base:ty [ $offset:literal ] $(, $comment:literal)? { $($fields:tt)* } ) => { - bitfield!(struct $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_relative $name @ $base [ $offset ]); }; // Creates an alias register of relative offset register `alias` with its own fields. ($name:ident => $base:ty [ $alias:ident ] $(, $comment:literal)? { $($fields:tt)* }) => { - bitfield!(struct $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_relative $name @ $base [ $alias::OFFSET ]); }; @@ -305,7 +305,7 @@ macro_rules! register { } ) => { static_assert!(::core::mem::size_of::() <= $stride); - bitfield!(struct $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_array $name @ $offset [ $size ; $stride ]); }; @@ -326,7 +326,7 @@ macro_rules! register { $(, $comment:literal)? { $($fields:tt)* } ) => { static_assert!(::core::mem::size_of::() <= $stride); - bitfield!(struct $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_relative_array $name @ $base [ $offset [ $size ; $stride ] ]); }; @@ -348,7 +348,7 @@ macro_rules! register { } ) => { static_assert!($idx < $alias::SIZE); - bitfield!(struct $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_relative $name @ $base [ $alias::OFFSET + $idx * $alias::STRIDE ] ); }; @@ -357,7 +357,7 @@ macro_rules! register { // to avoid it being interpreted in place of the relative register array alias rule. ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => { static_assert!($idx < $alias::SIZE); - bitfield!(struct $name $(, $comment)? { $($fields)* } ); + bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_fixed $name @ $alias::OFFSET + $idx * $alias::STRIDE ); }; -- cgit From 77ed4376d7c5de8be1f2612d6b4777077fb5fdb2 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 16 Oct 2025 11:13:23 -0400 Subject: gpu: nova-core: bitfield: Add support for custom visibility Add support for custom visibility to allow for users to control visibility of the structure and helpers. Reviewed-by: Alexandre Courbot Reviewed-by: Elle Rhumsaa Reviewed-by: Edwin Peer Signed-off-by: Joel Fernandes [acourbot@nvidia.com: fix long lines warnings and typo in commit message.] Signed-off-by: Alexandre Courbot Message-ID: <20251016151323.1201196-5-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/bitfield.rs | 55 ++++++++++++++++++++---------------- drivers/gpu/nova-core/regs/macros.rs | 16 +++++------ 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/nova-core/bitfield.rs b/drivers/gpu/nova-core/bitfield.rs index eff938937df4..25579b4c328f 100644 --- a/drivers/gpu/nova-core/bitfield.rs +++ b/drivers/gpu/nova-core/bitfield.rs @@ -60,7 +60,7 @@ /// } /// /// bitfield! { -/// struct ControlReg(u32) { +/// pub struct ControlReg(u32) { /// 7:7 state as bool => State; /// 3:0 mode as u8 ?=> Mode; /// } @@ -74,6 +74,9 @@ /// struct's storage size. /// - Debug and Default implementations. /// +/// Note: Field accessors and setters inherit the same visibility as the struct itself. +/// In the example above, both `mode()` and `set_mode()` methods will be `pub`. +/// /// Fields are defined as follows: /// /// - `as ` simply returns the field value casted to , typically `u32`, `u16`, `u8` or @@ -84,21 +87,21 @@ /// and returns the result. This is useful with fields for which not all values are valid. macro_rules! bitfield { // Main entry point - defines the bitfield struct with fields - (struct $name:ident($storage:ty) $(, $comment:literal)? { $($fields:tt)* }) => { - bitfield!(@core $name $storage $(, $comment)? { $($fields)* }); + ($vis:vis struct $name:ident($storage:ty) $(, $comment:literal)? { $($fields:tt)* }) => { + bitfield!(@core $vis $name $storage $(, $comment)? { $($fields)* }); }; // All rules below are helpers. // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, // `Default`, `BitOr`, and conversion to the value type) and field accessor methods. - (@core $name:ident $storage:ty $(, $comment:literal)? { $($fields:tt)* }) => { + (@core $vis:vis $name:ident $storage:ty $(, $comment:literal)? { $($fields:tt)* }) => { $( #[doc=$comment] )? #[repr(transparent)] #[derive(Clone, Copy)] - pub(crate) struct $name($storage); + $vis struct $name($storage); impl ::core::ops::BitOr for $name { type Output = Self; @@ -114,14 +117,14 @@ macro_rules! bitfield { } } - bitfield!(@fields_dispatcher $name $storage { $($fields)* }); + bitfield!(@fields_dispatcher $vis $name $storage { $($fields)* }); }; // Captures the fields and passes them to all the implementers that require field information. // // Used to simplify the matching rules for implementers, so they don't need to match the entire // complex fields rule even though they only make use of part of it. - (@fields_dispatcher $name:ident $storage:ty { + (@fields_dispatcher $vis:vis $name:ident $storage:ty { $($hi:tt:$lo:tt $field:ident as $type:tt $(?=> $try_into_type:ty)? $(=> $into_type:ty)? @@ -130,7 +133,7 @@ macro_rules! bitfield { )* } ) => { - bitfield!(@field_accessors $name $storage { + bitfield!(@field_accessors $vis $name $storage { $( $hi:$lo $field as $type $(?=> $try_into_type)? @@ -145,7 +148,7 @@ macro_rules! bitfield { // Defines all the field getter/setter methods for `$name`. ( - @field_accessors $name:ident $storage:ty { + @field_accessors $vis:vis $name:ident $storage:ty { $($hi:tt:$lo:tt $field:ident as $type:tt $(?=> $try_into_type:ty)? $(=> $into_type:ty)? @@ -161,7 +164,7 @@ macro_rules! bitfield { #[allow(dead_code)] impl $name { $( - bitfield!(@field_accessor $name $storage, $hi:$lo $field as $type + bitfield!(@field_accessor $vis $name $storage, $hi:$lo $field as $type $(?=> $try_into_type)? $(=> $into_type)? $(, $comment)? @@ -195,11 +198,11 @@ macro_rules! bitfield { // Catches fields defined as `bool` and convert them into a boolean value. ( - @field_accessor $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as bool => $into_type:ty - $(, $comment:literal)?; + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as bool + => $into_type:ty $(, $comment:literal)?; ) => { bitfield!( - @leaf_accessor $name $storage, $hi:$lo $field + @leaf_accessor $vis $name $storage, $hi:$lo $field { |f| <$into_type>::from(if f != 0 { true } else { false }) } bool $into_type => $into_type $(, $comment)?; ); @@ -207,18 +210,20 @@ macro_rules! bitfield { // Shortcut for fields defined as `bool` without the `=>` syntax. ( - @field_accessor $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as bool + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as bool $(, $comment:literal)?; ) => { - bitfield!(@field_accessor $name $storage, $hi:$lo $field as bool => bool $(, $comment)?;); + bitfield!( + @field_accessor $vis $name $storage, $hi:$lo $field as bool => bool $(, $comment)?; + ); }; // Catches the `?=>` syntax for non-boolean fields. ( - @field_accessor $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt ?=> $try_into_type:ty $(, $comment:literal)?; ) => { - bitfield!(@leaf_accessor $name $storage, $hi:$lo $field + bitfield!(@leaf_accessor $vis $name $storage, $hi:$lo $field { |f| <$try_into_type>::try_from(f as $type) } $type $try_into_type => ::core::result::Result< $try_into_type, @@ -229,24 +234,26 @@ macro_rules! bitfield { // Catches the `=>` syntax for non-boolean fields. ( - @field_accessor $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt => $into_type:ty $(, $comment:literal)?; ) => { - bitfield!(@leaf_accessor $name $storage, $hi:$lo $field + bitfield!(@leaf_accessor $vis $name $storage, $hi:$lo $field { |f| <$into_type>::from(f as $type) } $type $into_type => $into_type $(, $comment)?;); }; // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. ( - @field_accessor $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt $(, $comment:literal)?; ) => { - bitfield!(@field_accessor $name $storage, $hi:$lo $field as $type => $type $(, $comment)?;); + bitfield!( + @field_accessor $vis $name $storage, $hi:$lo $field as $type => $type $(, $comment)?; + ); }; // Generates the accessor methods for a single field. ( - @leaf_accessor $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident + @leaf_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident { $process:expr } $prim_type:tt $to_type:ty => $res_type:ty $(, $comment:literal)?; ) => { ::kernel::macros::paste!( @@ -269,7 +276,7 @@ macro_rules! bitfield { #[doc=$comment] )? #[inline(always)] - pub(crate) fn $field(self) -> $res_type { + $vis fn $field(self) -> $res_type { ::kernel::macros::paste!( const MASK: $storage = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; @@ -285,7 +292,7 @@ macro_rules! bitfield { #[doc=$comment] )? #[inline(always)] - pub(crate) fn [](mut self, value: $to_type) -> Self { + $vis fn [](mut self, value: $to_type) -> Self { const MASK: $storage = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; let value = ($storage::from($prim_type::from(value)) << SHIFT) & MASK; diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index ffd7d5cb73bb..c0a5194e8d97 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -276,25 +276,25 @@ pub(crate) trait RegisterBase { macro_rules! register { // Creates a register at a fixed offset of the MMIO space. ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { - bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_fixed $name @ $offset); }; // Creates an alias register of fixed offset register `alias` with its own fields. ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { - bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_fixed $name @ $alias::OFFSET); }; // Creates a register at a relative offset from a base address provider. ($name:ident @ $base:ty [ $offset:literal ] $(, $comment:literal)? { $($fields:tt)* } ) => { - bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_relative $name @ $base [ $offset ]); }; // Creates an alias register of relative offset register `alias` with its own fields. ($name:ident => $base:ty [ $alias:ident ] $(, $comment:literal)? { $($fields:tt)* }) => { - bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_relative $name @ $base [ $alias::OFFSET ]); }; @@ -305,7 +305,7 @@ macro_rules! register { } ) => { static_assert!(::core::mem::size_of::() <= $stride); - bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_array $name @ $offset [ $size ; $stride ]); }; @@ -326,7 +326,7 @@ macro_rules! register { $(, $comment:literal)? { $($fields:tt)* } ) => { static_assert!(::core::mem::size_of::() <= $stride); - bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_relative_array $name @ $base [ $offset [ $size ; $stride ] ]); }; @@ -348,7 +348,7 @@ macro_rules! register { } ) => { static_assert!($idx < $alias::SIZE); - bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_relative $name @ $base [ $alias::OFFSET + $idx * $alias::STRIDE ] ); }; @@ -357,7 +357,7 @@ macro_rules! register { // to avoid it being interpreted in place of the relative register array alias rule. ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => { static_assert!($idx < $alias::SIZE); - bitfield!(struct $name(u32) $(, $comment)? { $($fields)* } ); + bitfield!(pub(crate) struct $name(u32) $(, $comment)? { $($fields)* } ); register!(@io_fixed $name @ $alias::OFFSET + $idx * $alias::STRIDE ); }; -- cgit From 5ae65bdcb867555540169ef57876658262a67d87 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 16 Oct 2025 17:08:14 -0400 Subject: Partially revert "rust: drm: gem: Implement AlwaysRefCounted for all gem objects automatically" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently in order to implement AlwaysRefCounted for gem objects, we use a blanket implementation: unsafe impl AlwaysRefCounted for T { … } While this technically works, it comes with the rather unfortunate downside that attempting to create a similar blanket implementation in any other kernel crate will now fail in a rather confusing way. Using an example from the (not yet upstream) rust DRM KMS bindings, if we were to add: unsafe impl AlwaysRefCounted for T { … } Then the moment that both blanket implementations are present in the same kernel tree, compilation fails with the following: error[E0119]: conflicting implementations of trait `types::AlwaysRefCounted` --> rust/kernel/drm/kms.rs:504:1 | 504 | unsafe impl AlwaysRefCounted for T { | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ conflicting implementation | ::: rust/kernel/drm/gem/mod.rs:97:1 | 97 | unsafe impl AlwaysRefCounted for T { | ---------------------------------------------------- first implementation here So, revert these changes for now. The proper fix for this is to introduce a macro for copy/pasting the same implementation of AlwaysRefCounted around. This reverts commit 38cb08c3fcd3f3b1d0225dcec8ae50fab5751549. Signed-off-by: Lyude Paul Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20251016210955.2813186-2-lyude@redhat.com --- rust/kernel/drm/gem/mod.rs | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index 30c853988b94..20c2769a8c9d 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -55,26 +55,6 @@ pub trait IntoGEMObject: Sized + super::private::Sealed + AlwaysRefCounted { unsafe fn from_raw<'a>(self_ptr: *mut bindings::drm_gem_object) -> &'a Self; } -// SAFETY: All gem objects are refcounted. -unsafe impl AlwaysRefCounted for T { - fn inc_ref(&self) { - // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero. - unsafe { bindings::drm_gem_object_get(self.as_raw()) }; - } - - unsafe fn dec_ref(obj: NonNull) { - // SAFETY: We either hold the only refcount on `obj`, or one of many - meaning that no one - // else could possibly hold a mutable reference to `obj` and thus this immutable reference - // is safe. - let obj = unsafe { obj.as_ref() }.as_raw(); - - // SAFETY: - // - The safety requirements guarantee that the refcount is non-zero. - // - We hold no references to `obj` now, making it safe for us to potentially deallocate it. - unsafe { bindings::drm_gem_object_put(obj) }; - } -} - extern "C" fn open_callback( raw_obj: *mut bindings::drm_gem_object, raw_file: *mut bindings::drm_file, @@ -273,6 +253,22 @@ impl Object { } } +// SAFETY: Instances of `Object` are always reference-counted. +unsafe impl crate::types::AlwaysRefCounted for Object { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference guarantees that the refcount is non-zero. + unsafe { bindings::drm_gem_object_get(self.as_raw()) }; + } + + unsafe fn dec_ref(obj: NonNull) { + // SAFETY: `obj` is a valid pointer to an `Object`. + let obj = unsafe { obj.as_ref() }; + + // SAFETY: The safety requirements guarantee that the refcount is non-zero. + unsafe { bindings::drm_gem_object_put(obj.as_raw()) } + } +} + impl super::private::Sealed for Object {} impl Deref for Object { -- cgit From d3917368ebc5cd89d7d08eab4673e5c4c73ff42f Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 21 Oct 2025 13:21:36 -0400 Subject: rust: drm/gem: Remove Object.dev I noticed by chance that there's actually already a pointer to this in struct drm_gem_object. So, no use in carrying this around! Signed-off-by: Lyude Paul Acked-by: Danilo Krummrich Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20251021172220.252558-1-lyude@redhat.com --- rust/kernel/drm/gem/mod.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index 20c2769a8c9d..eb5f3feac890 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -167,12 +167,10 @@ impl BaseObject for T {} /// Invariants /// /// - `self.obj` is a valid instance of a `struct drm_gem_object`. -/// - `self.dev` is always a valid pointer to a `struct drm_device`. #[repr(C)] #[pin_data] pub struct Object { obj: Opaque, - dev: NonNull>, #[pin] data: T, } @@ -202,9 +200,6 @@ impl Object { try_pin_init!(Self { obj: Opaque::new(bindings::drm_gem_object::default()), data <- T::new(dev, size), - // INVARIANT: The drm subsystem guarantees that the `struct drm_device` will live - // as long as the GEM object lives. - dev: dev.into(), }), GFP_KERNEL, )?; @@ -227,9 +222,13 @@ impl Object { /// Returns the `Device` that owns this GEM object. pub fn dev(&self) -> &drm::Device { - // SAFETY: The DRM subsystem guarantees that the `struct drm_device` will live as long as - // the GEM object lives, hence the pointer must be valid. - unsafe { self.dev.as_ref() } + // SAFETY: + // - `struct drm_gem_object.dev` is initialized and valid for as long as the GEM + // object lives. + // - The device we used for creating the gem object is passed as &drm::Device to + // Object::::new(), so we know that `T::Driver` is the right generic parameter to use + // here. + unsafe { drm::Device::from_raw((*self.as_raw()).dev) } } fn as_raw(&self) -> *mut bindings::drm_gem_object { -- cgit From 34aadecdf3ab9f0d2d4f1ce001d016f64e15fbf0 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 24 Oct 2025 18:08:15 -0700 Subject: gpu: nova-core: regs: rename .alter() --> .update() This also changes .try_alter() to try_update(). After this commit, instead of "read, write and alter", the methods available for registers are now "read, write and update". This reads a lot easier for people who are used to working with registers, and aligns the API with what e.g. regmap uses. No functional changes are intended. Signed-off-by: John Hubbard [acourbot@nvidia.com: add Link tag for context.] [acourbot@nvidida.com: mention regmap in commit log.] Link: https://lore.kernel.org/all/2c5d90c8-e73a-4f04-9c1d-30adbd0fef07@nvidia.com/ Signed-off-by: Alexandre Courbot Message-ID: <20251025010815.566909-2-jhubbard@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 8 ++++---- drivers/gpu/nova-core/regs/macros.rs | 28 ++++++++++++++-------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 3f505b870601..1e70e39c9671 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -420,13 +420,13 @@ impl Falcon { } }); - regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(true)); + regs::NV_PFALCON_FALCON_ENGINE::update(bar, &E::ID, |v| v.set_reset(true)); // TODO[DLAY]: replace with udelay() or equivalent once available. // TIMEOUT: falcon engine should not take more than 10us to reset. let _: Result = util::wait_on(Delta::from_micros(10), || None); - regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(false)); + regs::NV_PFALCON_FALCON_ENGINE::update(bar, &E::ID, |v| v.set_reset(false)); self.reset_wait_mem_scrubbing(bar)?; @@ -543,9 +543,9 @@ impl Falcon { /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. pub(crate) fn dma_load>(&self, bar: &Bar0, fw: &F) -> Result { - regs::NV_PFALCON_FBIF_CTL::alter(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); + regs::NV_PFALCON_FBIF_CTL::update(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); - regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, &E::ID, 0, |v| { + regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 0, |v| { v.set_target(FalconFbifTarget::CoherentSysmem) .set_mem_type(FalconFbifMemType::Physical) }); diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index c0a5194e8d97..fd1a815fa57d 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -52,7 +52,7 @@ pub(crate) trait RegisterBase { /// boot0.set_major_revision(3).set_minor_revision(10).write(&bar); /// /// // Or, just read and update the register in a single step: -/// BOOT_0::alter(&bar, |r| r.set_major_revision(3).set_minor_revision(10)); +/// BOOT_0::update(&bar, |r| r.set_major_revision(3).set_minor_revision(10)); /// ``` /// /// The documentation strings are optional. If present, they will be added to the type's @@ -136,15 +136,15 @@ pub(crate) trait RegisterBase { /// 0:0 start as bool, "Start the CPU core"; /// }); /// -/// // The `read`, `write` and `alter` methods of relative registers take an extra `base` argument +/// // The `read`, `write` and `update` methods of relative registers take an extra `base` argument /// // that is used to resolve its final address by adding its `BASE` to the offset of the /// // register. /// /// // Start `CPU0`. -/// CPU_CTL::alter(bar, &CPU0, |r| r.set_start(true)); +/// CPU_CTL::update(bar, &CPU0, |r| r.set_start(true)); /// /// // Start `CPU1`. -/// CPU_CTL::alter(bar, &CPU1, |r| r.set_start(true)); +/// CPU_CTL::update(bar, &CPU1, |r| r.set_start(true)); /// /// // Aliases can also be defined for relative register. /// register!(CPU_CTL_ALIAS => CpuCtlBase[CPU_CTL], "Alias to CPU core control" { @@ -152,7 +152,7 @@ pub(crate) trait RegisterBase { /// }); /// /// // Start the aliased `CPU0`. -/// CPU_CTL_ALIAS::alter(bar, &CPU0, |r| r.set_alias_start(true)); +/// CPU_CTL_ALIAS::update(bar, &CPU0, |r| r.set_alias_start(true)); /// ``` /// /// ## Arrays of registers @@ -160,7 +160,7 @@ pub(crate) trait RegisterBase { /// Some I/O areas contain consecutive values that can be interpreted in the same way. These areas /// can be defined as an array of identical registers, allowing them to be accessed by index with /// compile-time or runtime bound checking. Simply define their address as `Address[Size]`, and add -/// an `idx` parameter to their `read`, `write` and `alter` methods: +/// an `idx` parameter to their `read`, `write` and `update` methods: /// /// ```no_run /// # fn no_run() -> Result<(), Error> { @@ -386,7 +386,7 @@ macro_rules! register { /// Read the register from its address in `io` and run `f` on its value to obtain a new /// value to write back. #[inline(always)] - pub(crate) fn alter( + pub(crate) fn update( io: &T, f: F, ) where @@ -449,7 +449,7 @@ macro_rules! register { /// the register's offset to it, then run `f` on its value to obtain a new value to /// write back. #[inline(always)] - pub(crate) fn alter( + pub(crate) fn update( io: &T, base: &B, f: F, @@ -507,7 +507,7 @@ macro_rules! register { /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a /// new value to write back. #[inline(always)] - pub(crate) fn alter( + pub(crate) fn update( io: &T, idx: usize, f: F, @@ -562,7 +562,7 @@ macro_rules! register { /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the /// access was out-of-bounds. #[inline(always)] - pub(crate) fn try_alter( + pub(crate) fn try_update( io: &T, idx: usize, f: F, @@ -571,7 +571,7 @@ macro_rules! register { F: ::core::ops::FnOnce(Self) -> Self, { if idx < Self::SIZE { - Ok(Self::alter(io, idx, f)) + Ok(Self::update(io, idx, f)) } else { Err(EINVAL) } @@ -636,7 +636,7 @@ macro_rules! register { /// by `base` and adding the register's offset to it, then run `f` on its value to /// obtain a new value to write back. #[inline(always)] - pub(crate) fn alter( + pub(crate) fn update( io: &T, base: &B, idx: usize, @@ -700,7 +700,7 @@ macro_rules! register { /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the /// access was out-of-bounds. #[inline(always)] - pub(crate) fn try_alter( + pub(crate) fn try_update( io: &T, base: &B, idx: usize, @@ -711,7 +711,7 @@ macro_rules! register { F: ::core::ops::FnOnce(Self) -> Self, { if idx < Self::SIZE { - Ok(Self::alter(io, base, idx, f)) + Ok(Self::update(io, base, idx, f)) } else { Err(EINVAL) } -- cgit From f6797dca29bf4bd6b66e1f4284f94dfe08d9d513 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 24 Oct 2025 18:20:17 -0700 Subject: gpu: nova-core: Ada: basic GPU identification ...which is sufficient to make Ada GPUs work, because they use the pre-existing Ampere GPU code, unmodified. Tested on AD102 (RTX 6000 Ada). Signed-off-by: John Hubbard Signed-off-by: Alexandre Courbot Message-ID: <20251025012017.573078-1-jhubbard@nvidia.com> --- drivers/gpu/nova-core/falcon/hal.rs | 2 +- drivers/gpu/nova-core/firmware/gsp.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs index bba288455617..c6c71db1bb70 100644 --- a/drivers/gpu/nova-core/falcon/hal.rs +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -44,7 +44,7 @@ pub(super) fn falcon_hal( use Chipset::*; let hal = match chipset { - GA102 | GA103 | GA104 | GA106 | GA107 => { + GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => { KBox::new(ga102::Ga102::::new(), GFP_KERNEL)? as KBox> } _ => return Err(ENOTSUPP), diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs index 6b0761460a57..24c3ea698940 100644 --- a/drivers/gpu/nova-core/firmware/gsp.rs +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -150,6 +150,7 @@ impl GspFirmware { let sigs_section = match chipset.arch() { Architecture::Ampere => ".fwsignature_ga10x", + Architecture::Ada => ".fwsignature_ad10x", _ => return Err(ENOTSUPP), }; let signatures = elf::elf64_section(fw.data(), sigs_section) -- cgit From 1784fb79d6c8db159d928314391817c425731de8 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 24 Oct 2025 18:40:49 -0700 Subject: gpu: nova-core: remove an unnecessary register read: HWCFG1 This register read is not required in order to bring up any of the GPUs, and it is read too early on Hopper/Blackwell+ GPUs anyway. So just stop doing this. Signed-off-by: John Hubbard Signed-off-by: Alexandre Courbot Message-ID: <20251025014050.585153-2-jhubbard@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 1e70e39c9671..4c14ce1d19e9 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -371,11 +371,6 @@ impl Falcon { bar: &Bar0, need_riscv: bool, ) -> Result { - let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, &E::ID); - // Check that the revision and security model contain valid values. - let _ = hwcfg1.core_rev()?; - let _ = hwcfg1.security_model()?; - if need_riscv { let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); if !hwcfg2.riscv() { -- cgit From c58f00b44eed0968bd8ea0ce8082ef72aa19e1f8 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 24 Oct 2025 18:40:50 -0700 Subject: gpu: nova-core: remove unnecessary need_riscv, bar parameters The need_riscv parameter and its associated RISCV validation logic are are actually unnecessary for correct operation. Remove it, along with the now-unused bar parameter as well. Signed-off-by: John Hubbard Signed-off-by: Alexandre Courbot Message-ID: <20251025014050.585153-3-jhubbard@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 21 +-------------------- drivers/gpu/nova-core/gpu.rs | 4 +--- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 4c14ce1d19e9..e4a4d454941c 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -362,26 +362,7 @@ pub(crate) struct Falcon { impl Falcon { /// Create a new falcon instance. - /// - /// `need_riscv` is set to `true` if the caller expects the falcon to be a dual falcon/riscv - /// controller. - pub(crate) fn new( - dev: &device::Device, - chipset: Chipset, - bar: &Bar0, - need_riscv: bool, - ) -> Result { - if need_riscv { - let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); - if !hwcfg2.riscv() { - dev_err!( - dev, - "riscv support requested on a controller that does not support it\n" - ); - return Err(EINVAL); - } - } - + pub(crate) fn new(dev: &device::Device, chipset: Chipset) -> Result { Ok(Self { hal: hal::falcon_hal(chipset)?, dev: dev.into(), diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index af20e2daea24..9d182bffe8b4 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -213,12 +213,10 @@ impl Gpu { gsp_falcon: Falcon::new( pdev.as_ref(), spec.chipset, - bar, - spec.chipset > Chipset::GA100, ) .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, - sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset, bar, true)?, + sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?, gsp <- Gsp::new(), -- cgit From 76544ef6a01b2d8fa86f92ff17940b6ff534696e Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 20 Oct 2025 15:09:25 +0900 Subject: gpu: nova-core: replace wait_on with kernel equivalents wait_on was a temporary helper function waiting for a kernel crate equivalent. Now that read_poll_timeout and fsleep are available, use them and remove wait_on. Acked-by: Danilo Krummrich Signed-off-by: Alexandre Courbot Message-ID: <20251020-nova_wait_on-v1-1-2eb87fb38d14@nvidia.com> --- Documentation/gpu/nova/core/todo.rst | 11 ------ drivers/gpu/nova-core/falcon.rs | 62 ++++++++++++++----------------- drivers/gpu/nova-core/falcon/hal/ga102.rs | 16 ++++---- drivers/gpu/nova-core/gfw.rs | 36 ++++++++---------- drivers/gpu/nova-core/nova_core.rs | 1 - drivers/gpu/nova-core/util.rs | 27 -------------- 6 files changed, 51 insertions(+), 102 deletions(-) delete mode 100644 drivers/gpu/nova-core/util.rs diff --git a/Documentation/gpu/nova/core/todo.rst b/Documentation/gpu/nova/core/todo.rst index 0972cb905f7a..c55c7bedbfdf 100644 --- a/Documentation/gpu/nova/core/todo.rst +++ b/Documentation/gpu/nova/core/todo.rst @@ -153,17 +153,6 @@ A `num` core kernel module is being designed to provide these operations. | Complexity: Intermediate | Contact: Alexandre Courbot -Delay / Sleep abstractions [DLAY] ---------------------------------- - -Rust abstractions for the kernel's delay() and sleep() functions. - -FUJITA Tomonori plans to work on abstractions for read_poll_timeout_atomic() -(and friends) [1]. - -| Complexity: Beginner -| Link: https://lore.kernel.org/netdev/20250228.080550.354359820929821928.fujita.tomonori@gmail.com/ [1] - IRQ abstractions ---------------- diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index e4a4d454941c..fb3561cc9746 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -6,8 +6,10 @@ use core::ops::Deref; use hal::FalconHal; use kernel::device; use kernel::dma::DmaAddress; +use kernel::io::poll::read_poll_timeout; use kernel::prelude::*; use kernel::sync::aref::ARef; +use kernel::time::delay::fsleep; use kernel::time::Delta; use crate::dma::DmaObject; @@ -15,7 +17,6 @@ use crate::driver::Bar0; use crate::gpu::Chipset; use crate::regs; use crate::regs::macros::RegisterBase; -use crate::util; pub(crate) mod gsp; mod hal; @@ -372,13 +373,13 @@ impl Falcon { /// Wait for memory scrubbing to complete. fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { // TIMEOUT: memory scrubbing should complete in less than 20ms. - util::wait_on(Delta::from_millis(20), || { - if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID).mem_scrubbing_done() { - Some(()) - } else { - None - } - }) + read_poll_timeout( + || Ok(regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID)), + |r| r.mem_scrubbing_done(), + Delta::ZERO, + Delta::from_millis(20), + ) + .map(|_| ()) } /// Reset the falcon engine. @@ -387,20 +388,17 @@ impl Falcon { // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set // RESET_READY so a non-failing timeout is used. - let _ = util::wait_on(Delta::from_micros(150), || { - let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); - if r.reset_ready() { - Some(()) - } else { - None - } - }); + let _ = read_poll_timeout( + || Ok(regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID)), + |r| r.reset_ready(), + Delta::ZERO, + Delta::from_micros(150), + ); regs::NV_PFALCON_FALCON_ENGINE::update(bar, &E::ID, |v| v.set_reset(true)); - // TODO[DLAY]: replace with udelay() or equivalent once available. // TIMEOUT: falcon engine should not take more than 10us to reset. - let _: Result = util::wait_on(Delta::from_micros(10), || None); + fsleep(Delta::from_micros(10)); regs::NV_PFALCON_FALCON_ENGINE::update(bar, &E::ID, |v| v.set_reset(false)); @@ -504,14 +502,12 @@ impl Falcon { // Wait for the transfer to complete. // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories // should ever take that long. - util::wait_on(Delta::from_secs(2), || { - let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, &E::ID); - if r.idle() { - Some(()) - } else { - None - } - })?; + read_poll_timeout( + || Ok(regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, &E::ID)), + |r| r.idle(), + Delta::ZERO, + Delta::from_secs(2), + )?; } Ok(()) @@ -574,14 +570,12 @@ impl Falcon { } // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. - util::wait_on(Delta::from_secs(2), || { - let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID); - if r.halted() { - Some(()) - } else { - None - } - })?; + read_poll_timeout( + || Ok(regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID)), + |r| r.halted(), + Delta::ZERO, + Delta::from_secs(2), + )?; let (mbox0, mbox1) = ( regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value(), diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index 0b1cbe7853b3..f2ae9537321d 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -3,6 +3,7 @@ use core::marker::PhantomData; use kernel::device; +use kernel::io::poll::read_poll_timeout; use kernel::prelude::*; use kernel::time::Delta; @@ -11,7 +12,6 @@ use crate::falcon::{ Falcon, FalconBromParams, FalconEngine, FalconModSelAlgo, PeregrineCoreSelect, }; use crate::regs; -use crate::util; use super::FalconHal; @@ -23,14 +23,12 @@ fn select_core_ga102(bar: &Bar0) -> Result { .write(bar, &E::ID); // TIMEOUT: falcon core should take less than 10ms to report being enabled. - util::wait_on(Delta::from_millis(10), || { - let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); - if r.valid() { - Some(()) - } else { - None - } - })?; + read_poll_timeout( + || Ok(regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID)), + |r| r.valid(), + Delta::ZERO, + Delta::from_millis(10), + )?; } Ok(()) diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs index 8ac1ed187199..23c28c2a3793 100644 --- a/drivers/gpu/nova-core/gfw.rs +++ b/drivers/gpu/nova-core/gfw.rs @@ -18,13 +18,12 @@ //! //! Note that the devinit sequence also needs to run during suspend/resume. -use kernel::bindings; +use kernel::io::poll::read_poll_timeout; use kernel::prelude::*; use kernel::time::Delta; use crate::driver::Bar0; use crate::regs; -use crate::util; /// Wait for the `GFW` (GPU firmware) boot completion signal (`GFW_BOOT`), or a 4 seconds timeout. /// @@ -50,22 +49,19 @@ pub(crate) fn wait_gfw_boot_completion(bar: &Bar0) -> Result { // // TIMEOUT: arbitrarily large value. GFW starts running immediately after the GPU is put out of // reset, and should complete in less time than that. - util::wait_on(Delta::from_secs(4), || { - // Check that FWSEC has lowered its protection level before reading the GFW_BOOT status. - let gfw_booted = regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK::read(bar) - .read_protection_level0() - && regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT::read(bar).completed(); - - if gfw_booted { - Some(()) - } else { - // TODO[DLAY]: replace with [1] once it merges. - // [1] https://lore.kernel.org/rust-for-linux/20250423192857.199712-6-fujita.tomonori@gmail.com/ - // - // SAFETY: `msleep()` is safe to call with any parameter. - unsafe { bindings::msleep(1) }; - - None - } - }) + read_poll_timeout( + || { + Ok( + // Check that FWSEC has lowered its protection level before reading the GFW_BOOT + // status. + regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK::read(bar) + .read_protection_level0() + && regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT::read(bar).completed(), + ) + }, + |&gfw_booted| gfw_booted, + Delta::from_millis(1), + Delta::from_secs(4), + ) + .map(|_| ()) } diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index 112277c7921e..e130166c1086 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -14,7 +14,6 @@ mod gfw; mod gpu; mod gsp; mod regs; -mod util; mod vbios; pub(crate) const MODULE_NAME: &kernel::str::CStr = ::NAME; diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs deleted file mode 100644 index bf35f00cb732..000000000000 --- a/drivers/gpu/nova-core/util.rs +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -use kernel::prelude::*; -use kernel::time::{Delta, Instant, Monotonic}; - -/// Wait until `cond` is true or `timeout` elapsed. -/// -/// When `cond` evaluates to `Some`, its return value is returned. -/// -/// `Err(ETIMEDOUT)` is returned if `timeout` has been reached without `cond` evaluating to -/// `Some`. -/// -/// TODO[DLAY]: replace with `read_poll_timeout` once it is available. -/// (https://lore.kernel.org/lkml/20250220070611.214262-8-fujita.tomonori@gmail.com/) -pub(crate) fn wait_on Option>(timeout: Delta, cond: F) -> Result { - let start_time = Instant::::now(); - - loop { - if let Some(ret) = cond() { - return Ok(ret); - } - - if start_time.elapsed().as_nanos() > timeout.as_nanos() { - return Err(ETIMEDOUT); - } - } -} -- cgit From fa08ec789f81425c3722fd2c6df825e06c810b43 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 22 Oct 2025 19:50:56 +0900 Subject: gpu: nova-core: bitfield: simplify condition This condition was uselessly convoluted. Reported-by: Edwin Peer Link: https://lore.kernel.org/rust-for-linux/F3853912-2C1C-4F9B-89B0-3168689F35B3@nvidia.com/ Reviewed-by: Joel Fernandes Reviewed-by: Danilo Krummrich Signed-off-by: Alexandre Courbot Message-ID: <20251022-nova-bitfield-v1-1-73bc0988667b@nvidia.com> --- drivers/gpu/nova-core/bitfield.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/nova-core/bitfield.rs b/drivers/gpu/nova-core/bitfield.rs index 25579b4c328f..136de7289162 100644 --- a/drivers/gpu/nova-core/bitfield.rs +++ b/drivers/gpu/nova-core/bitfield.rs @@ -203,7 +203,7 @@ macro_rules! bitfield { ) => { bitfield!( @leaf_accessor $vis $name $storage, $hi:$lo $field - { |f| <$into_type>::from(if f != 0 { true } else { false }) } + { |f| <$into_type>::from(f != 0) } bool $into_type => $into_type $(, $comment)?; ); }; -- cgit From e6b8932b9374bd0591bb4dda3eb3e4f1cc50655e Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 22 Oct 2025 19:50:57 +0900 Subject: gpu: nova-core: bitfield: simplify expression The shift is more easily expressed by the index of the lowest bit of the field. Reported-by: Edwin Peer Link: https://lore.kernel.org/rust-for-linux/F3853912-2C1C-4F9B-89B0-3168689F35B3@nvidia.com/ Reviewed-by: Joel Fernandes Reviewed-by: Danilo Krummrich Signed-off-by: Alexandre Courbot Message-ID: <20251022-nova-bitfield-v1-2-73bc0988667b@nvidia.com> --- drivers/gpu/nova-core/bitfield.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/nova-core/bitfield.rs b/drivers/gpu/nova-core/bitfield.rs index 136de7289162..f113439c6501 100644 --- a/drivers/gpu/nova-core/bitfield.rs +++ b/drivers/gpu/nova-core/bitfield.rs @@ -268,7 +268,7 @@ macro_rules! bitfield { _ => ::kernel::build_error!("Unsupported storage type size") } }; - const [<$field:upper _SHIFT>]: u32 = Self::[<$field:upper _MASK>].trailing_zeros(); + const [<$field:upper _SHIFT>]: u32 = $lo; ); $( -- cgit From ca16b15e78f4dee1631c0a68693f5e7d9b3bb3ec Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 22 Oct 2025 19:50:58 +0900 Subject: gpu: nova-core: bitfield: remove BitOr implementation Using this operand can produce invalid values. It also doesn't bring any benefit as one can use the builder pattern to assemble a new value. Reported-by: Edwin Peer Link: https://lore.kernel.org/rust-for-linux/F3853912-2C1C-4F9B-89B0-3168689F35B3@nvidia.com/ Reviewed-by: Joel Fernandes Reviewed-by: Danilo Krummrich Signed-off-by: Alexandre Courbot Message-ID: <20251022-nova-bitfield-v1-3-73bc0988667b@nvidia.com> --- drivers/gpu/nova-core/bitfield.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/gpu/nova-core/bitfield.rs b/drivers/gpu/nova-core/bitfield.rs index f113439c6501..16e143658c51 100644 --- a/drivers/gpu/nova-core/bitfield.rs +++ b/drivers/gpu/nova-core/bitfield.rs @@ -94,7 +94,7 @@ macro_rules! bitfield { // All rules below are helpers. // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, - // `Default`, `BitOr`, and conversion to the value type) and field accessor methods. + // `Default`, and conversion to the value type) and field accessor methods. (@core $vis:vis $name:ident $storage:ty $(, $comment:literal)? { $($fields:tt)* }) => { $( #[doc=$comment] @@ -103,14 +103,6 @@ macro_rules! bitfield { #[derive(Clone, Copy)] $vis struct $name($storage); - impl ::core::ops::BitOr for $name { - type Output = Self; - - fn bitor(self, rhs: Self) -> Self::Output { - Self(self.0 | rhs.0) - } - } - impl ::core::convert::From<$name> for $storage { fn from(val: $name) -> $storage { val.0 -- cgit From 29e7c311b5726f0836013b7e7d8920d9b8dc1812 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 28 Oct 2025 11:44:27 +0100 Subject: MAINTAINERS: add Tyr to DRM DRIVERS AND COMMON INFRASTRUCTURE [RUST] Commit cf4fd52e3236 ("rust: drm: Introduce the Tyr driver for Arm Mali GPUs") introduced the Tyr driver for ARM Mali GPUs, which is maintained through the drm-rust tree, hence add it to the corresponding entry in MAINTAINERS. Signed-off-by: Danilo Krummrich Acked-by: Daniel Almeida Acked-by: Alice Ryhl Link: https://patch.msgid.link/20251028104433.334886-1-dakr@kernel.org Signed-off-by: Alice Ryhl --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 545a4776795e..952aed4619c2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8247,6 +8247,7 @@ S: Supported W: https://drm.pages.freedesktop.org/maintainer-tools/drm-rust.html T: git https://gitlab.freedesktop.org/drm/rust/kernel.git F: drivers/gpu/drm/nova/ +F: drivers/gpu/drm/tyr/ F: drivers/gpu/nova-core/ F: rust/kernel/drm/ -- cgit From ba1b40ed0e34bab597fd90d4c4e9f7397f878c8f Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 28 Oct 2025 12:00:52 +0100 Subject: drm: nova: depend on CONFIG_64BIT nova-core already depends on CONFIG_64BIT, hence also depend on CONFIG_64BIT for nova-drm. Reviewed-by: Alexandre Courbot Reviewed-by: John Hubbard Link: https://patch.msgid.link/20251028110058.340320-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nova/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nova/Kconfig b/drivers/gpu/drm/nova/Kconfig index cca6a3fea879..2a70aac64d7e 100644 --- a/drivers/gpu/drm/nova/Kconfig +++ b/drivers/gpu/drm/nova/Kconfig @@ -1,5 +1,6 @@ config DRM_NOVA tristate "Nova DRM driver" + depends on 64BIT depends on DRM=y depends on PCI depends on RUST -- cgit From 97ad568cd6a58804129ba071f3258b5c4782fb0d Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 28 Oct 2025 12:00:53 +0100 Subject: drm: nova: select NOVA_CORE The nova-drm driver does not provide any value without nova-core being selected as well, hence select NOVA_CORE. Fixes: cdeaeb9dd762 ("drm: nova-drm: add initial driver skeleton") Reviewed-by: Alexandre Courbot Reviewed-by: John Hubbard Link: https://patch.msgid.link/20251028110058.340320-2-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nova/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nova/Kconfig b/drivers/gpu/drm/nova/Kconfig index 2a70aac64d7e..3e637ad7b5ba 100644 --- a/drivers/gpu/drm/nova/Kconfig +++ b/drivers/gpu/drm/nova/Kconfig @@ -5,6 +5,7 @@ config DRM_NOVA depends on PCI depends on RUST select AUXILIARY_BUS + select NOVA_CORE default n help Choose this if you want to build the Nova DRM driver for Nvidia -- cgit From 9a3c2f8a4f84960a48c056d0da88de3d09e6d622 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 29 Oct 2025 08:12:09 +0900 Subject: gpu: nova-core: replace `as` with `from` conversions where possible The `as` operator is best avoided as it silently drops bits if the destination type is smaller that the source. For data types where this is clearly not the case, use `from` to unambiguously signal that these conversions are lossless. Acked-by: Danilo Krummrich Signed-off-by: Alexandre Courbot Message-ID: <20251029-nova-as-v3-1-6a30c7333ad9@nvidia.com> --- drivers/gpu/nova-core/falcon/hal/ga102.rs | 6 ++--- drivers/gpu/nova-core/firmware/fwsec.rs | 4 +-- drivers/gpu/nova-core/vbios.rs | 42 +++++++++++++++---------------- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index f2ae9537321d..afed353b24d2 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -40,11 +40,9 @@ fn signature_reg_fuse_version_ga102( engine_id_mask: u16, ucode_id: u8, ) -> Result { - const NV_FUSE_OPT_FPF_SIZE: u8 = regs::NV_FUSE_OPT_FPF_SIZE as u8; - // Each engine has 16 ucode version registers numbered from 1 to 16. - let ucode_idx = match ucode_id { - 1..=NV_FUSE_OPT_FPF_SIZE => (ucode_id - 1) as usize, + let ucode_idx = match usize::from(ucode_id) { + ucode_id @ 1..=regs::NV_FUSE_OPT_FPF_SIZE => ucode_id - 1, _ => { dev_err!(dev, "invalid ucode id {:#x}", ucode_id); return Err(EINVAL); diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 8edbb5c0572c..dd3420aaa2bf 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -259,13 +259,13 @@ impl FirmwareDmaObject { } // Find the DMEM mapper section in the firmware. - for i in 0..hdr.entry_count as usize { + for i in 0..usize::from(hdr.entry_count) { let app: &FalconAppifV1 = // SAFETY: we have exclusive access to `dma_object`. unsafe { transmute( &dma_object, - hdr_offset + hdr.header_size as usize + i * hdr.entry_size as usize + hdr_offset + usize::from(hdr.header_size) + i * usize::from(hdr.entry_size) ) }?; diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index ad070a0420ca..74ed6d61e6cc 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -361,7 +361,7 @@ impl PcirStruct { /// Calculate image size in bytes from 512-byte blocks. fn image_size_bytes(&self) -> usize { - self.image_len as usize * 512 + usize::from(self.image_len) * 512 } } @@ -439,13 +439,13 @@ impl BitToken { let header = &image.bit_header; // Offset to the first token entry - let tokens_start = image.bit_offset + header.header_size as usize; + let tokens_start = image.bit_offset + usize::from(header.header_size); - for i in 0..header.token_entries as usize { - let entry_offset = tokens_start + (i * header.token_size as usize); + for i in 0..usize::from(header.token_entries) { + let entry_offset = tokens_start + (i * usize::from(header.token_size)); // Make sure we don't go out of bounds - if entry_offset + header.token_size as usize > image.base.data.len() { + if entry_offset + usize::from(header.token_size) > image.base.data.len() { return Err(EINVAL); } @@ -601,7 +601,7 @@ impl NpdeStruct { /// Calculate image size in bytes from 512-byte blocks. fn image_size_bytes(&self) -> usize { - self.subimage_len as usize * 512 + usize::from(self.subimage_len) * 512 } /// Try to find NPDE in the data, the NPDE is right after the PCIR. @@ -613,8 +613,8 @@ impl NpdeStruct { ) -> Option { // Calculate the offset where NPDE might be located // NPDE should be right after the PCIR structure, aligned to 16 bytes - let pcir_offset = rom_header.pci_data_struct_offset as usize; - let npde_start = (pcir_offset + pcir.pci_data_struct_len as usize + 0x0F) & !0x0F; + let pcir_offset = usize::from(rom_header.pci_data_struct_offset); + let npde_start = (pcir_offset + usize::from(pcir.pci_data_struct_len) + 0x0F) & !0x0F; // Check if we have enough data if npde_start + core::mem::size_of::() > data.len() { @@ -737,7 +737,7 @@ impl BiosImage { .inspect_err(|e| dev_err!(dev, "Failed to create PciRomHeader: {:?}\n", e))?; // Get the PCI Data Structure using the pointer from the ROM header. - let pcir_offset = rom_header.pci_data_struct_offset as usize; + let pcir_offset = usize::from(rom_header.pci_data_struct_offset); let pcir_data = data .get(pcir_offset..pcir_offset + core::mem::size_of::()) .ok_or(EINVAL) @@ -805,12 +805,12 @@ impl PciAtBiosImage { let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?; // Make sure we don't go out of bounds - if token.data_offset as usize + 4 > self.base.data.len() { + if usize::from(token.data_offset) + 4 > self.base.data.len() { return Err(EINVAL); } // read the 4 bytes at the offset specified in the token - let offset = token.data_offset as usize; + let offset = usize::from(token.data_offset); let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| { dev_err!(self.base.dev, "Failed to convert data slice to array"); EINVAL @@ -886,9 +886,9 @@ impl PmuLookupTable { return Err(EINVAL); } - let header_len = data[1] as usize; - let entry_len = data[2] as usize; - let entry_count = data[3] as usize; + let header_len = usize::from(data[1]); + let entry_len = usize::from(data[2]); + let entry_count = usize::from(data[3]); let required_bytes = header_len + (entry_count * entry_len); @@ -911,9 +911,9 @@ impl PmuLookupTable { Ok(PmuLookupTable { version: data[0], - header_len: header_len as u8, - entry_len: entry_len as u8, - entry_count: entry_count as u8, + header_len: data[1], + entry_len: data[2], + entry_count: data[3], table_data, }) } @@ -923,7 +923,7 @@ impl PmuLookupTable { return Err(EINVAL); } - let index = (idx as usize) * self.entry_len as usize; + let index = (usize::from(idx)) * usize::from(self.entry_len); PmuLookupTableEntry::new(&self.table_data[index..]) } @@ -1092,8 +1092,8 @@ impl FwSecBiosImage { pub(crate) fn sigs(&self, desc: &FalconUCodeDescV3) -> Result<&[Bcrt30Rsa3kSignature]> { // The signatures data follows the descriptor. let sigs_data_offset = self.falcon_ucode_offset + core::mem::size_of::(); - let sigs_size = - desc.signature_count as usize * core::mem::size_of::(); + let sigs_count = usize::from(desc.signature_count); + let sigs_size = sigs_count * core::mem::size_of::(); // Make sure the data is within bounds. if sigs_data_offset + sigs_size > self.base.data.len() { @@ -1113,7 +1113,7 @@ impl FwSecBiosImage { .as_ptr() .add(sigs_data_offset) .cast::(), - desc.signature_count as usize, + sigs_count, ) }) } -- cgit From 505c3ec507a7eb4bbaef9aa8b13b6452e86baca2 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 29 Oct 2025 08:12:10 +0900 Subject: gpu: nova-core: vbios: do not use `as` when comparing BiosImageType Use the `image_type` method and compare its result to avoid using `as`. Reviewed-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251029-nova-as-v3-2-6a30c7333ad9@nvidia.com> --- drivers/gpu/nova-core/vbios.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 74ed6d61e6cc..9283921e89c3 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -709,9 +709,8 @@ impl BiosImage { /// Check if this is the last image. fn is_last(&self) -> bool { - // For NBSI images (type == 0x70), return true as they're - // considered the last image - if self.pcir.code_type == BiosImageType::Nbsi as u8 { + // For NBSI images, return true as they're considered the last image. + if self.image_type() == Ok(BiosImageType::Nbsi) { return true; } -- cgit From 6553a8f168fb7941ae73d39eccac64f3a2b9b399 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 29 Oct 2025 08:12:11 +0900 Subject: gpu: nova-core: use `try_from` instead of `as` for u32 conversions There are a few situations in the driver where we convert a `usize` into a `u32` using `as`. Even though most of these are obviously correct, use `try_from` and let the compiler optimize wherever it is safe to do so. Acked-by: Danilo Krummrich Signed-off-by: Alexandre Courbot Message-ID: <20251029-nova-as-v3-3-6a30c7333ad9@nvidia.com> --- drivers/gpu/nova-core/fb/hal/tu102.rs | 16 +++++++--------- drivers/gpu/nova-core/firmware/fwsec.rs | 8 ++++---- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/nova-core/fb/hal/tu102.rs b/drivers/gpu/nova-core/fb/hal/tu102.rs index b022c781caf4..32114c3b3686 100644 --- a/drivers/gpu/nova-core/fb/hal/tu102.rs +++ b/drivers/gpu/nova-core/fb/hal/tu102.rs @@ -15,15 +15,13 @@ pub(super) fn read_sysmem_flush_page_gm107(bar: &Bar0) -> u64 { pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result { // Check that the address doesn't overflow the receiving 32-bit register. - if addr >> (u32::BITS + FLUSH_SYSMEM_ADDR_SHIFT) == 0 { - regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() - .set_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32) - .write(bar); - - Ok(()) - } else { - Err(EINVAL) - } + u32::try_from(addr >> FLUSH_SYSMEM_ADDR_SHIFT) + .map_err(|_| EINVAL) + .map(|addr| { + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() + .set_adr_39_08(addr) + .write(bar) + }) } pub(super) fn display_enabled_gm107(bar: &Bar0) -> bool { diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index dd3420aaa2bf..ce78c1563754 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -291,7 +291,7 @@ impl FirmwareDmaObject { frts_cmd.read_vbios = ReadVbios { ver: 1, - hdr: size_of::() as u32, + hdr: u32::try_from(size_of::())?, addr: 0, size: 0, flags: 2, @@ -304,9 +304,9 @@ impl FirmwareDmaObject { } => { frts_cmd.frts_region = FrtsRegion { ver: 1, - hdr: size_of::() as u32, - addr: (frts_addr >> 12) as u32, - size: (frts_size >> 12) as u32, + hdr: u32::try_from(size_of::())?, + addr: u32::try_from(frts_addr >> 12)?, + size: u32::try_from(frts_size >> 12)?, ftype: NVFW_FRTS_CMD_REGION_TYPE_FB, }; -- cgit From e4ead68a390511384d6af7bc9d00835dd6185e3b Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 29 Oct 2025 00:07:36 +0900 Subject: rust: transmute: add `from_bytes_prefix` family of methods The `from_bytes*` family of functions expect a slice of the exact same size as the requested type. This can be sometimes cumbersome for callers that deal with dynamic stream of data that needs to be manually cut before each invocation of `from_bytes`. To simplify such callers, introduce a new `from_bytes*_prefix` family of methods, which split the input slice at the index required for the equivalent `from_bytes` method to succeed, and return its result alongside with the remainder of the slice. This design is inspired by zerocopy's `try_*_from_prefix` family of methods. Reviewed-by: Joel Fernandes Reviewed-by: Danilo Krummrich Reviewed-by: Alice Ryhl Acked-by: Danilo Krummrich Signed-off-by: Alexandre Courbot Message-ID: <20251029-nova-vbios-frombytes-v1-1-ac441ebc1de3@nvidia.com> Message-ID: <20251101-b4-frombytes-prefix-v1-1-0d9c1fd63b34@nvidia.com> --- rust/kernel/transmute.rs | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/rust/kernel/transmute.rs b/rust/kernel/transmute.rs index cfc37d81adf2..be5dbf3829e2 100644 --- a/rust/kernel/transmute.rs +++ b/rust/kernel/transmute.rs @@ -58,6 +58,27 @@ pub unsafe trait FromBytes { } } + /// Converts the beginning of `bytes` to a reference to `Self`. + /// + /// This method is similar to [`Self::from_bytes`], with the difference that `bytes` does not + /// need to be the same size of `Self` - the appropriate portion is cut from the beginning of + /// `bytes`, and the remainder returned alongside `Self`. + fn from_bytes_prefix(bytes: &[u8]) -> Option<(&Self, &[u8])> + where + Self: Sized, + { + if bytes.len() < size_of::() { + None + } else { + // PANIC: We checked that `bytes.len() >= size_of::`, thus `split_at` cannot + // panic. + // TODO: replace with `split_at_checked` once the MSRV is >= 1.80. + let (prefix, remainder) = bytes.split_at(size_of::()); + + Self::from_bytes(prefix).map(|s| (s, remainder)) + } + } + /// Converts a mutable slice of bytes to a reference to `Self`. /// /// Succeeds if the reference is properly aligned, and the size of `bytes` is equal to that of @@ -80,6 +101,27 @@ pub unsafe trait FromBytes { } } + /// Converts the beginning of `bytes` to a mutable reference to `Self`. + /// + /// This method is similar to [`Self::from_bytes_mut`], with the difference that `bytes` does + /// not need to be the same size of `Self` - the appropriate portion is cut from the beginning + /// of `bytes`, and the remainder returned alongside `Self`. + fn from_bytes_mut_prefix(bytes: &mut [u8]) -> Option<(&mut Self, &mut [u8])> + where + Self: AsBytes + Sized, + { + if bytes.len() < size_of::() { + None + } else { + // PANIC: We checked that `bytes.len() >= size_of::`, thus `split_at_mut` cannot + // panic. + // TODO: replace with `split_at_mut_checked` once the MSRV is >= 1.80. + let (prefix, remainder) = bytes.split_at_mut(size_of::()); + + Self::from_bytes_mut(prefix).map(|s| (s, remainder)) + } + } + /// Creates an owned instance of `Self` by copying `bytes`. /// /// Unlike [`FromBytes::from_bytes`], which requires aligned input, this method can be used on @@ -97,6 +139,27 @@ pub unsafe trait FromBytes { None } } + + /// Creates an owned instance of `Self` from the beginning of `bytes`. + /// + /// This method is similar to [`Self::from_bytes_copy`], with the difference that `bytes` does + /// not need to be the same size of `Self` - the appropriate portion is cut from the beginning + /// of `bytes`, and the remainder returned alongside `Self`. + fn from_bytes_copy_prefix(bytes: &[u8]) -> Option<(Self, &[u8])> + where + Self: Sized, + { + if bytes.len() < size_of::() { + None + } else { + // PANIC: We checked that `bytes.len() >= size_of::`, thus `split_at` cannot + // panic. + // TODO: replace with `split_at_checked` once the MSRV is >= 1.80. + let (prefix, remainder) = bytes.split_at(size_of::()); + + Self::from_bytes_copy(prefix).map(|s| (s, remainder)) + } + } } macro_rules! impl_frombytes { -- cgit From 7f74842d95d1a24c68d23320de4f3eb27e6ba82b Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 29 Oct 2025 00:07:37 +0900 Subject: gpu: nova-core: vbios: use FromBytes for PmuLookupTable header Use `from_bytes_copy_prefix` to create the `PmuLookupTable` header instead of building it ourselves from the bytes stream. This lets us remove a few `as` conversions and array accesses. Reviewed-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251029-nova-vbios-frombytes-v1-2-ac441ebc1de3@nvidia.com> --- drivers/gpu/nova-core/vbios.rs | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 9283921e89c3..8854dbe0998d 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -10,6 +10,7 @@ use kernel::device; use kernel::error::Result; use kernel::prelude::*; use kernel::ptr::{Alignable, Alignment}; +use kernel::transmute::FromBytes; use kernel::types::ARef; /// The offset of the VBIOS ROM in the BAR0 space. @@ -865,29 +866,34 @@ impl PmuLookupTableEntry { } } +#[repr(C)] +struct PmuLookupTableHeader { + version: u8, + header_len: u8, + entry_len: u8, + entry_count: u8, +} + +// SAFETY: all bit patterns are valid for `PmuLookupTableHeader`. +unsafe impl FromBytes for PmuLookupTableHeader {} + /// The [`PmuLookupTableEntry`] structure is used to find the [`PmuLookupTableEntry`] for a given /// application ID. /// /// The table of entries is pointed to by the falcon data pointer in the BIT table, and is used to /// locate the Falcon Ucode. -#[expect(dead_code)] struct PmuLookupTable { - version: u8, - header_len: u8, - entry_len: u8, - entry_count: u8, + header: PmuLookupTableHeader, table_data: KVec, } impl PmuLookupTable { fn new(dev: &device::Device, data: &[u8]) -> Result { - if data.len() < 4 { - return Err(EINVAL); - } + let (header, _) = PmuLookupTableHeader::from_bytes_copy_prefix(data).ok_or(EINVAL)?; - let header_len = usize::from(data[1]); - let entry_len = usize::from(data[2]); - let entry_count = usize::from(data[3]); + let header_len = usize::from(header.header_len); + let entry_len = usize::from(header.entry_len); + let entry_count = usize::from(header.entry_count); let required_bytes = header_len + (entry_count * entry_len); @@ -908,27 +914,21 @@ impl PmuLookupTable { dev_dbg!(dev, "PMU entry: {:02x?}\n", &data[i..][..entry_len]); } - Ok(PmuLookupTable { - version: data[0], - header_len: data[1], - entry_len: data[2], - entry_count: data[3], - table_data, - }) + Ok(PmuLookupTable { header, table_data }) } fn lookup_index(&self, idx: u8) -> Result { - if idx >= self.entry_count { + if idx >= self.header.entry_count { return Err(EINVAL); } - let index = (usize::from(idx)) * usize::from(self.entry_len); + let index = (usize::from(idx)) * usize::from(self.header.entry_len); PmuLookupTableEntry::new(&self.table_data[index..]) } // find entry by type value fn find_entry_by_type(&self, entry_type: u8) -> Result { - for i in 0..self.entry_count { + for i in 0..self.header.entry_count { let entry = self.lookup_index(i)?; if entry.application_id == entry_type { return Ok(entry); -- cgit From 56bb4b17a696a91aeaf7939d467a4f586edb01c6 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 29 Oct 2025 00:07:38 +0900 Subject: gpu: nova-core: vbios: use FromBytes for PcirStruct Use `from_bytes_copy_prefix` to create `PcirStruct` instead of building it ourselves from the bytes stream. This lets us remove a few array accesses and results in shorter code. Reviewed-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251029-nova-vbios-frombytes-v1-3-ac441ebc1de3@nvidia.com> --- drivers/gpu/nova-core/vbios.rs | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 8854dbe0998d..81544b33077d 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -314,45 +314,29 @@ struct PcirStruct { max_runtime_image_len: u16, } +// SAFETY: all bit patterns are valid for `PcirStruct`. +unsafe impl FromBytes for PcirStruct {} + impl PcirStruct { fn new(dev: &device::Device, data: &[u8]) -> Result { - if data.len() < core::mem::size_of::() { - dev_err!(dev, "Not enough data for PcirStruct\n"); - return Err(EINVAL); - } - - let mut signature = [0u8; 4]; - signature.copy_from_slice(&data[0..4]); + let (pcir, _) = PcirStruct::from_bytes_copy_prefix(data).ok_or(EINVAL)?; // Signature should be "PCIR" (0x52494350) or "NPDS" (0x5344504e). - if &signature != b"PCIR" && &signature != b"NPDS" { - dev_err!(dev, "Invalid signature for PcirStruct: {:?}\n", signature); + if &pcir.signature != b"PCIR" && &pcir.signature != b"NPDS" { + dev_err!( + dev, + "Invalid signature for PcirStruct: {:?}\n", + pcir.signature + ); return Err(EINVAL); } - let mut class_code = [0u8; 3]; - class_code.copy_from_slice(&data[13..16]); - - let image_len = u16::from_le_bytes([data[16], data[17]]); - if image_len == 0 { + if pcir.image_len == 0 { dev_err!(dev, "Invalid image length: 0\n"); return Err(EINVAL); } - Ok(PcirStruct { - signature, - vendor_id: u16::from_le_bytes([data[4], data[5]]), - device_id: u16::from_le_bytes([data[6], data[7]]), - device_list_ptr: u16::from_le_bytes([data[8], data[9]]), - pci_data_struct_len: u16::from_le_bytes([data[10], data[11]]), - pci_data_struct_rev: data[12], - class_code, - image_len, - vendor_rom_rev: u16::from_le_bytes([data[18], data[19]]), - code_type: data[20], - last_image: data[21], - max_runtime_image_len: u16::from_le_bytes([data[22], data[23]]), - }) + Ok(pcir) } /// Check if this is the last image in the ROM. -- cgit From 46768644a164f0f5eaa06fdf93718edcbbc47b64 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 29 Oct 2025 00:07:39 +0900 Subject: gpu: nova-core: vbios: use FromBytes for BitHeader Use `from_bytes_copy_prefix` to create `BitHeader` instead of building it ourselves from the bytes stream. This lets us remove a few array accesses and results in shorter code. Reviewed-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251029-nova-vbios-frombytes-v1-4-ac441ebc1de3@nvidia.com> --- drivers/gpu/nova-core/vbios.rs | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 81544b33077d..0efd2502c230 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -374,30 +374,19 @@ struct BitHeader { checksum: u8, } +// SAFETY: all bit patterns are valid for `BitHeader`. +unsafe impl FromBytes for BitHeader {} + impl BitHeader { fn new(data: &[u8]) -> Result { - if data.len() < core::mem::size_of::() { - return Err(EINVAL); - } - - let mut signature = [0u8; 4]; - signature.copy_from_slice(&data[2..6]); + let (header, _) = BitHeader::from_bytes_copy_prefix(data).ok_or(EINVAL)?; // Check header ID and signature - let id = u16::from_le_bytes([data[0], data[1]]); - if id != 0xB8FF || &signature != b"BIT\0" { + if header.id != 0xB8FF || &header.signature != b"BIT\0" { return Err(EINVAL); } - Ok(BitHeader { - id, - signature, - bcd_version: u16::from_le_bytes([data[6], data[7]]), - header_size: data[8], - token_size: data[9], - token_entries: data[10], - checksum: data[11], - }) + Ok(header) } } -- cgit From ade19c5060dfa39b84a9475a4a6b05e2a8a2b3ac Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 29 Oct 2025 00:07:40 +0900 Subject: gpu: nova-core: vbios: use FromBytes for NpdeStruct Use `from_bytes_copy_prefix` to create `NpdeStruct` instead of building it ourselves from the bytes stream. This lets us remove a few array accesses and results in shorter code. Reviewed-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251029-nova-vbios-frombytes-v1-5-ac441ebc1de3@nvidia.com> --- drivers/gpu/nova-core/vbios.rs | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 0efd2502c230..aec9166ffb45 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -537,35 +537,29 @@ struct NpdeStruct { last_image: u8, } +// SAFETY: all bit patterns are valid for `NpdeStruct`. +unsafe impl FromBytes for NpdeStruct {} + impl NpdeStruct { fn new(dev: &device::Device, data: &[u8]) -> Option { - if data.len() < core::mem::size_of::() { - dev_dbg!(dev, "Not enough data for NpdeStruct\n"); - return None; - } - - let mut signature = [0u8; 4]; - signature.copy_from_slice(&data[0..4]); + let (npde, _) = NpdeStruct::from_bytes_copy_prefix(data)?; // Signature should be "NPDE" (0x4544504E). - if &signature != b"NPDE" { - dev_dbg!(dev, "Invalid signature for NpdeStruct: {:?}\n", signature); + if &npde.signature != b"NPDE" { + dev_dbg!( + dev, + "Invalid signature for NpdeStruct: {:?}\n", + npde.signature + ); return None; } - let subimage_len = u16::from_le_bytes([data[8], data[9]]); - if subimage_len == 0 { + if npde.subimage_len == 0 { dev_dbg!(dev, "Invalid subimage length: 0\n"); return None; } - Some(NpdeStruct { - signature, - npci_data_ext_rev: u16::from_le_bytes([data[4], data[5]]), - npci_data_ext_len: u16::from_le_bytes([data[6], data[7]]), - subimage_len, - last_image: data[10], - }) + Some(npde) } /// Check if this is the last image in the ROM. -- cgit From 173c99b85aa05387fcfb3231293124c5d611d167 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Thu, 6 Nov 2025 18:10:06 -0800 Subject: gpu: nova-core: apply the one "use" item per line policy As per [1], we need one "use" item per line, in order to reduce merge conflicts. Furthermore, we need a trailing ", //" in order to tell rustfmt(1) to leave it alone. This does that for the entire nova-core driver. [1] https://docs.kernel.org/rust/coding-guidelines.html#imports Acked-by: Danilo Krummrich Signed-off-by: John Hubbard [acourbot@nvidia.com: remove imports already in prelude as pointed out by Danilo.] [acourbot@nvidia.com: remove a few unneeded trailing `//`.] Signed-off-by: Alexandre Courbot Message-ID: <20251107021006.434109-1-jhubbard@nvidia.com> --- drivers/gpu/nova-core/dma.rs | 17 ++++++---- drivers/gpu/nova-core/driver.rs | 11 +++++-- drivers/gpu/nova-core/falcon.rs | 34 ++++++++++++-------- drivers/gpu/nova-core/falcon/gsp.rs | 12 +++++-- drivers/gpu/nova-core/falcon/hal.rs | 12 +++++-- drivers/gpu/nova-core/falcon/hal/ga102.rs | 26 ++++++++++------ drivers/gpu/nova-core/falcon/sec2.rs | 10 ++++-- drivers/gpu/nova-core/fb.rs | 27 ++++++++++------ drivers/gpu/nova-core/fb/hal.rs | 6 ++-- drivers/gpu/nova-core/fb/hal/ga100.rs | 12 ++++--- drivers/gpu/nova-core/fb/hal/ga102.rs | 8 +++-- drivers/gpu/nova-core/fb/hal/tu102.rs | 9 ++++-- drivers/gpu/nova-core/firmware.rs | 23 ++++++++------ drivers/gpu/nova-core/firmware/booter.rs | 45 +++++++++++++++++--------- drivers/gpu/nova-core/firmware/fwsec.rs | 52 ++++++++++++++++++++++--------- drivers/gpu/nova-core/firmware/gsp.rs | 33 ++++++++++++++------ drivers/gpu/nova-core/firmware/riscv.rs | 16 ++++++---- drivers/gpu/nova-core/gfw.rs | 14 ++++++--- drivers/gpu/nova-core/gpu.rs | 29 ++++++++++++----- drivers/gpu/nova-core/gsp/boot.rs | 44 +++++++++++++++++--------- drivers/gpu/nova-core/regs.rs | 24 +++++++++++--- drivers/gpu/nova-core/vbios.rs | 28 +++++++++++------ 22 files changed, 335 insertions(+), 157 deletions(-) diff --git a/drivers/gpu/nova-core/dma.rs b/drivers/gpu/nova-core/dma.rs index 94f44bcfd748..5b117aefdb15 100644 --- a/drivers/gpu/nova-core/dma.rs +++ b/drivers/gpu/nova-core/dma.rs @@ -2,12 +2,17 @@ //! Simple DMA object wrapper. -use core::ops::{Deref, DerefMut}; - -use kernel::device; -use kernel::dma::CoherentAllocation; -use kernel::page::PAGE_SIZE; -use kernel::prelude::*; +use core::ops::{ + Deref, + DerefMut, // +}; + +use kernel::{ + device, + dma::CoherentAllocation, + page::PAGE_SIZE, + prelude::*, // +}; pub(crate) struct DmaObject { dma: CoherentAllocation, diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index edc72052e27a..2509f75eccb9 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -1,13 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 use kernel::{ - auxiliary, c_str, + auxiliary, + c_str, device::Core, pci, - pci::{Class, ClassMask, Vendor}, + pci::{ + Class, + ClassMask, + Vendor, // + }, prelude::*, sizes::SZ_16M, - sync::Arc, + sync::Arc, // }; use crate::gpu::Gpu; diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index fb3561cc9746..8efc910f20af 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -3,20 +3,28 @@ //! Falcon microprocessor base support use core::ops::Deref; + use hal::FalconHal; -use kernel::device; -use kernel::dma::DmaAddress; -use kernel::io::poll::read_poll_timeout; -use kernel::prelude::*; -use kernel::sync::aref::ARef; -use kernel::time::delay::fsleep; -use kernel::time::Delta; - -use crate::dma::DmaObject; -use crate::driver::Bar0; -use crate::gpu::Chipset; -use crate::regs; -use crate::regs::macros::RegisterBase; + +use kernel::{ + device, + dma::DmaAddress, + io::poll::read_poll_timeout, + prelude::*, + sync::aref::ARef, + time::{ + delay::fsleep, + Delta, // + }, +}; + +use crate::{ + dma::DmaObject, + driver::Bar0, + gpu::Chipset, + regs, + regs::macros::RegisterBase, // +}; pub(crate) mod gsp; mod hal; diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs index f17599cb49fa..93d4eca65631 100644 --- a/drivers/gpu/nova-core/falcon/gsp.rs +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -2,8 +2,16 @@ use crate::{ driver::Bar0, - falcon::{Falcon, FalconEngine, PFalcon2Base, PFalconBase}, - regs::{self, macros::RegisterBase}, + falcon::{ + Falcon, + FalconEngine, + PFalcon2Base, + PFalconBase, // + }, + regs::{ + self, + macros::RegisterBase, // + }, }; /// Type specifying the `Gsp` falcon engine. Cannot be instantiated. diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs index c6c71db1bb70..8dc56a28ad65 100644 --- a/drivers/gpu/nova-core/falcon/hal.rs +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -2,9 +2,15 @@ use kernel::prelude::*; -use crate::driver::Bar0; -use crate::falcon::{Falcon, FalconBromParams, FalconEngine}; -use crate::gpu::Chipset; +use crate::{ + driver::Bar0, + falcon::{ + Falcon, + FalconBromParams, + FalconEngine, // + }, + gpu::Chipset, +}; mod ga102; diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index afed353b24d2..69a7a95cac16 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -2,16 +2,24 @@ use core::marker::PhantomData; -use kernel::device; -use kernel::io::poll::read_poll_timeout; -use kernel::prelude::*; -use kernel::time::Delta; - -use crate::driver::Bar0; -use crate::falcon::{ - Falcon, FalconBromParams, FalconEngine, FalconModSelAlgo, PeregrineCoreSelect, +use kernel::{ + device, + io::poll::read_poll_timeout, + prelude::*, + time::Delta, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + Falcon, + FalconBromParams, + FalconEngine, + FalconModSelAlgo, + PeregrineCoreSelect, // + }, + regs, }; -use crate::regs; use super::FalconHal; diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs index 815786c8480d..b57d362e576a 100644 --- a/drivers/gpu/nova-core/falcon/sec2.rs +++ b/drivers/gpu/nova-core/falcon/sec2.rs @@ -1,7 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 -use crate::falcon::{FalconEngine, PFalcon2Base, PFalconBase}; -use crate::regs::macros::RegisterBase; +use crate::{ + falcon::{ + FalconEngine, + PFalcon2Base, + PFalconBase, // + }, + regs::macros::RegisterBase, +}; /// Type specifying the `Sec2` falcon engine. Cannot be instantiated. pub(crate) struct Sec2(()); diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs index 27d9edab8347..989bbfd5bdee 100644 --- a/drivers/gpu/nova-core/fb.rs +++ b/drivers/gpu/nova-core/fb.rs @@ -2,16 +2,23 @@ use core::ops::Range; -use kernel::prelude::*; -use kernel::ptr::{Alignable, Alignment}; -use kernel::sizes::*; -use kernel::sync::aref::ARef; -use kernel::{dev_warn, device}; - -use crate::dma::DmaObject; -use crate::driver::Bar0; -use crate::gpu::Chipset; -use crate::regs; +use kernel::{ + device, + prelude::*, + ptr::{ + Alignable, + Alignment, // + }, + sizes::*, + sync::aref::ARef, // +}; + +use crate::{ + dma::DmaObject, + driver::Bar0, + gpu::Chipset, + regs, // +}; mod hal; diff --git a/drivers/gpu/nova-core/fb/hal.rs b/drivers/gpu/nova-core/fb/hal.rs index 2f914948bb9a..aba0abd8ee00 100644 --- a/drivers/gpu/nova-core/fb/hal.rs +++ b/drivers/gpu/nova-core/fb/hal.rs @@ -2,8 +2,10 @@ use kernel::prelude::*; -use crate::driver::Bar0; -use crate::gpu::Chipset; +use crate::{ + driver::Bar0, + gpu::Chipset, // +}; mod ga100; mod ga102; diff --git a/drivers/gpu/nova-core/fb/hal/ga100.rs b/drivers/gpu/nova-core/fb/hal/ga100.rs index 871c42bf033a..dae392c38a1b 100644 --- a/drivers/gpu/nova-core/fb/hal/ga100.rs +++ b/drivers/gpu/nova-core/fb/hal/ga100.rs @@ -1,15 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 -struct Ga100; - use kernel::prelude::*; -use crate::driver::Bar0; -use crate::fb::hal::FbHal; -use crate::regs; +use crate::{ + driver::Bar0, + fb::hal::FbHal, + regs, // +}; use super::tu102::FLUSH_SYSMEM_ADDR_SHIFT; +struct Ga100; + pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 { u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT | u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::read(bar).adr_63_40()) diff --git a/drivers/gpu/nova-core/fb/hal/ga102.rs b/drivers/gpu/nova-core/fb/hal/ga102.rs index a73b77e39715..734605905031 100644 --- a/drivers/gpu/nova-core/fb/hal/ga102.rs +++ b/drivers/gpu/nova-core/fb/hal/ga102.rs @@ -2,9 +2,11 @@ use kernel::prelude::*; -use crate::driver::Bar0; -use crate::fb::hal::FbHal; -use crate::regs; +use crate::{ + driver::Bar0, + fb::hal::FbHal, + regs, // +}; fn vidmem_size_ga102(bar: &Bar0) -> u64 { regs::NV_USABLE_FB_SIZE_IN_MB::read(bar).usable_fb_size() diff --git a/drivers/gpu/nova-core/fb/hal/tu102.rs b/drivers/gpu/nova-core/fb/hal/tu102.rs index 32114c3b3686..eec984f4e816 100644 --- a/drivers/gpu/nova-core/fb/hal/tu102.rs +++ b/drivers/gpu/nova-core/fb/hal/tu102.rs @@ -1,10 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 -use crate::driver::Bar0; -use crate::fb::hal::FbHal; -use crate::regs; use kernel::prelude::*; +use crate::{ + driver::Bar0, + fb::hal::FbHal, + regs, // +}; + /// Shift applied to the sysmem address before it is written into `NV_PFB_NISO_FLUSH_SYSMEM_ADDR`, /// to be used by HALs. pub(super) const FLUSH_SYSMEM_ADDR_SHIFT: u32 = 8; diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 4179a74a2342..163b746f03ef 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -4,17 +4,20 @@ //! to be loaded into a given execution unit. use core::marker::PhantomData; -use core::mem::size_of; -use kernel::device; -use kernel::firmware; -use kernel::prelude::*; -use kernel::str::CString; -use kernel::transmute::FromBytes; - -use crate::dma::DmaObject; -use crate::falcon::FalconFirmware; -use crate::gpu; +use kernel::{ + device, + firmware, + prelude::*, + str::CString, + transmute::FromBytes, // +}; + +use crate::{ + dma::DmaObject, + falcon::FalconFirmware, + gpu, // +}; pub(crate) mod booter; pub(crate) mod fwsec; diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs index b4ff1b17e4a0..1e8f6c99fa2e 100644 --- a/drivers/gpu/nova-core/firmware/booter.rs +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -4,20 +4,37 @@ //! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon //! (and optionally unload it through a separate firmware image). -use core::marker::PhantomData; -use core::mem::size_of; -use core::ops::Deref; - -use kernel::device; -use kernel::prelude::*; -use kernel::transmute::FromBytes; - -use crate::dma::DmaObject; -use crate::driver::Bar0; -use crate::falcon::sec2::Sec2; -use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget}; -use crate::firmware::{BinFirmware, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned}; -use crate::gpu::Chipset; +use core::{ + marker::PhantomData, + ops::Deref, // +}; + +use kernel::{ + device, + prelude::*, + transmute::FromBytes, // +}; + +use crate::{ + dma::DmaObject, + driver::Bar0, + falcon::{ + sec2::Sec2, + Falcon, + FalconBromParams, + FalconFirmware, + FalconLoadParams, + FalconLoadTarget, // + }, + firmware::{ + BinFirmware, + FirmwareDmaObject, + FirmwareSignature, + Signed, + Unsigned, // + }, + gpu::Chipset, +}; /// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at /// `offset` in `slice`. diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index ce78c1563754..8dbc6b516d27 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -10,20 +10,44 @@ //! - The command to be run, as this firmware can perform several tasks ; //! - The ucode signature, so the GSP falcon can run FWSEC in HS mode. -use core::marker::PhantomData; -use core::mem::{align_of, size_of}; -use core::ops::Deref; - -use kernel::device::{self, Device}; -use kernel::prelude::*; -use kernel::transmute::FromBytes; - -use crate::dma::DmaObject; -use crate::driver::Bar0; -use crate::falcon::gsp::Gsp; -use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget}; -use crate::firmware::{FalconUCodeDescV3, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned}; -use crate::vbios::Vbios; +use core::{ + marker::PhantomData, + mem::{ + align_of, + size_of, // + }, + ops::Deref, +}; + +use kernel::{ + device::{ + self, + Device, // + }, + prelude::*, + transmute::FromBytes, +}; + +use crate::{ + dma::DmaObject, + driver::Bar0, + falcon::{ + gsp::Gsp, + Falcon, + FalconBromParams, + FalconFirmware, + FalconLoadParams, + FalconLoadTarget, // + }, + firmware::{ + FalconUCodeDescV3, + FirmwareDmaObject, + FirmwareSignature, + Signed, + Unsigned, // + }, + vbios::Vbios, +}; const NVFW_FALCON_APPIF_ID_DMEMMAPPER: u32 = 0x4; diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs index 24c3ea698940..939e036896bf 100644 --- a/drivers/gpu/nova-core/firmware/gsp.rs +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -2,16 +2,29 @@ use core::mem::size_of_val; -use kernel::device; -use kernel::dma::{DataDirection, DmaAddress}; -use kernel::kvec; -use kernel::prelude::*; -use kernel::scatterlist::{Owned, SGTable}; - -use crate::dma::DmaObject; -use crate::firmware::riscv::RiscvFirmware; -use crate::gpu::{Architecture, Chipset}; -use crate::gsp::GSP_PAGE_SIZE; +use kernel::{ + device, + dma::{ + DataDirection, + DmaAddress, // + }, + kvec, + prelude::*, + scatterlist::{ + Owned, + SGTable, // + }, +}; + +use crate::{ + dma::DmaObject, + firmware::riscv::RiscvFirmware, + gpu::{ + Architecture, + Chipset, // + }, + gsp::GSP_PAGE_SIZE, +}; /// Ad-hoc and temporary module to extract sections from ELF images. /// diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs index afb08f5bc4ba..196dedb96aeb 100644 --- a/drivers/gpu/nova-core/firmware/riscv.rs +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -5,13 +5,17 @@ use core::mem::size_of; -use kernel::device; -use kernel::firmware::Firmware; -use kernel::prelude::*; -use kernel::transmute::FromBytes; +use kernel::{ + device, + firmware::Firmware, + prelude::*, + transmute::FromBytes, // +}; -use crate::dma::DmaObject; -use crate::firmware::BinFirmware; +use crate::{ + dma::DmaObject, + firmware::BinFirmware, // +}; /// Descriptor for microcode running on a RISC-V core. #[repr(C)] diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs index 23c28c2a3793..9121f400046d 100644 --- a/drivers/gpu/nova-core/gfw.rs +++ b/drivers/gpu/nova-core/gfw.rs @@ -18,12 +18,16 @@ //! //! Note that the devinit sequence also needs to run during suspend/resume. -use kernel::io::poll::read_poll_timeout; -use kernel::prelude::*; -use kernel::time::Delta; +use kernel::{ + io::poll::read_poll_timeout, + prelude::*, + time::Delta, // +}; -use crate::driver::Bar0; -use crate::regs; +use crate::{ + driver::Bar0, + regs, // +}; /// Wait for the `GFW` (GPU firmware) boot completion signal (`GFW_BOOT`), or a 4 seconds timeout. /// diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 9d182bffe8b4..802e71e4f97d 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -1,13 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{device, devres::Devres, error::code::*, fmt, pci, prelude::*, sync::Arc}; - -use crate::driver::Bar0; -use crate::falcon::{gsp::Gsp as GspFalcon, sec2::Sec2 as Sec2Falcon, Falcon}; -use crate::fb::SysmemFlush; -use crate::gfw; -use crate::gsp::Gsp; -use crate::regs; +use kernel::{ + device, + devres::Devres, + fmt, + pci, + prelude::*, + sync::Arc, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + gsp::Gsp as GspFalcon, + sec2::Sec2 as Sec2Falcon, + Falcon, // + }, + fb::SysmemFlush, + gfw, + gsp::Gsp, + regs, +}; macro_rules! define_chipset { ({ $($variant:ident = $value:expr),* $(,)* }) => diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 2800f3aee37d..19dddff929da 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -1,21 +1,35 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::device; -use kernel::pci; -use kernel::prelude::*; - -use crate::driver::Bar0; -use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; -use crate::fb::FbLayout; -use crate::firmware::{ - booter::{BooterFirmware, BooterKind}, - fwsec::{FwsecCommand, FwsecFirmware}, - gsp::GspFirmware, - FIRMWARE_VERSION, +use kernel::{ + device, + pci, + prelude::*, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + gsp::Gsp, + sec2::Sec2, + Falcon, // + }, + fb::FbLayout, + firmware::{ + booter::{ + BooterFirmware, + BooterKind, // + }, + fwsec::{ + FwsecCommand, + FwsecFirmware, // + }, + gsp::GspFirmware, + FIRMWARE_VERSION, // + }, + gpu::Chipset, + regs, + vbios::Vbios, }; -use crate::gpu::Chipset; -use crate::regs; -use crate::vbios::Vbios; impl super::Gsp { /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 206dab2e1335..7cd2e8a4d4c6 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -7,13 +7,27 @@ #[macro_use] pub(crate) mod macros; -use crate::falcon::{ - DmaTrfCmdSize, FalconCoreRev, FalconCoreRevSubversion, FalconFbifMemType, FalconFbifTarget, - FalconModSelAlgo, FalconSecurityModel, PFalcon2Base, PFalconBase, PeregrineCoreSelect, -}; -use crate::gpu::{Architecture, Chipset}; use kernel::prelude::*; +use crate::{ + falcon::{ + DmaTrfCmdSize, + FalconCoreRev, + FalconCoreRevSubversion, + FalconFbifMemType, + FalconFbifTarget, + FalconModSelAlgo, + FalconSecurityModel, + PFalcon2Base, + PFalconBase, + PeregrineCoreSelect, // + }, + gpu::{ + Architecture, + Chipset, // + }, +}; + // PMC register!(NV_PMC_BOOT_0 @ 0x00000000, "Basic revision information about the GPU" { diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index aec9166ffb45..9c5b93adeb96 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -2,16 +2,26 @@ //! VBIOS extraction and parsing. -use crate::driver::Bar0; -use crate::firmware::fwsec::Bcrt30Rsa3kSignature; -use crate::firmware::FalconUCodeDescV3; use core::convert::TryFrom; -use kernel::device; -use kernel::error::Result; -use kernel::prelude::*; -use kernel::ptr::{Alignable, Alignment}; -use kernel::transmute::FromBytes; -use kernel::types::ARef; + +use kernel::{ + device, + prelude::*, + ptr::{ + Alignable, + Alignment, // + }, + transmute::FromBytes, + types::ARef, +}; + +use crate::{ + driver::Bar0, + firmware::{ + fwsec::Bcrt30Rsa3kSignature, + FalconUCodeDescV3, // + }, +}; /// The offset of the VBIOS ROM in the BAR0 space. const ROM_OFFSET: usize = 0x300000; -- cgit From 473f778592e499c0dd0e1dd9fce8eb80923355d9 Mon Sep 17 00:00:00 2001 From: Daniel del Castillo Date: Tue, 4 Nov 2025 20:37:48 +0100 Subject: gpu: nova-core: Simplify `transmute` and `transmute_mut` in fwsec.rs This patch solves one of the existing mentions of COHA, a task in the Nova task list about improving the `CoherentAllocation` API. It uses the new `from_bytes` method from the `FromBytes` trait as well as the `as_slice` and `as_slice_mut` methods from `CoherentAllocation`. Signed-off-by: Daniel del Castillo [acourbot@nvidia.com: set prefix to "gpu: nova-core:".] [acourbot@nvidia.com: fix merge conflict after imports refactor.] Signed-off-by: Alexandre Courbot Message-ID: <20251104193756.57726-1-delcastillodelarosadaniel@gmail.com> --- drivers/gpu/nova-core/firmware/fwsec.rs | 125 +++++++++++++++----------------- 1 file changed, 58 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 8dbc6b516d27..e06ae931bf55 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -12,11 +12,8 @@ use core::{ marker::PhantomData, - mem::{ - align_of, - size_of, // - }, - ops::Deref, + mem::size_of, + ops::Deref, // }; use kernel::{ @@ -25,7 +22,10 @@ use kernel::{ Device, // }, prelude::*, - transmute::FromBytes, + transmute::{ + AsBytes, + FromBytes, // + }, }; use crate::{ @@ -94,6 +94,8 @@ struct FalconAppifDmemmapperV3 { } // SAFETY: any byte sequence is valid for this struct. unsafe impl FromBytes for FalconAppifDmemmapperV3 {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for FalconAppifDmemmapperV3 {} #[derive(Debug)] #[repr(C, packed)] @@ -106,6 +108,8 @@ struct ReadVbios { } // SAFETY: any byte sequence is valid for this struct. unsafe impl FromBytes for ReadVbios {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for ReadVbios {} #[derive(Debug)] #[repr(C, packed)] @@ -118,6 +122,8 @@ struct FrtsRegion { } // SAFETY: any byte sequence is valid for this struct. unsafe impl FromBytes for FrtsRegion {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for FrtsRegion {} const NVFW_FRTS_CMD_REGION_TYPE_FB: u32 = 2; @@ -128,6 +134,8 @@ struct FrtsCmd { } // SAFETY: any byte sequence is valid for this struct. unsafe impl FromBytes for FrtsCmd {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for FrtsCmd {} const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS: u32 = 0x15; const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB: u32 = 0x19; @@ -171,26 +179,15 @@ impl FirmwareSignature for Bcrt30Rsa3kSignature {} /// /// # Safety /// -/// Callers must ensure that the region of memory returned is not written for as long as the -/// returned reference is alive. -/// -/// TODO[TRSM][COHA]: Remove this and `transmute_mut` once `CoherentAllocation::as_slice` is -/// available and we have a way to transmute objects implementing FromBytes, e.g.: -/// https://lore.kernel.org/lkml/20250330234039.29814-1-christiansantoslima21@gmail.com/ -unsafe fn transmute<'a, 'b, T: Sized + FromBytes>( - fw: &'a DmaObject, - offset: usize, -) -> Result<&'b T> { - if offset + size_of::() > fw.size() { - return Err(EINVAL); - } - if (fw.start_ptr() as usize + offset) % align_of::() != 0 { - return Err(EINVAL); - } - - // SAFETY: we have checked that the pointer is properly aligned that its pointed memory is - // large enough the contains an instance of `T`, which implements `FromBytes`. - Ok(unsafe { &*(fw.start_ptr().add(offset).cast::()) }) +/// * Callers must ensure that the device does not read/write to/from memory while the returned +/// reference is live. +/// * Callers must ensure that this call does not race with a write to the same region while +/// the returned reference is live. +unsafe fn transmute(fw: &DmaObject, offset: usize) -> Result<&T> { + // SAFETY: The safety requirements of the function guarantee the device won't read + // or write to memory while the reference is alive and that this call won't race + // with writes to the same memory region. + T::from_bytes(unsafe { fw.as_slice(offset, size_of::())? }).ok_or(EINVAL) } /// Reinterpret the area starting from `offset` in `fw` as a mutable instance of `T` (which must @@ -198,22 +195,18 @@ unsafe fn transmute<'a, 'b, T: Sized + FromBytes>( /// /// # Safety /// -/// Callers must ensure that the region of memory returned is not read or written for as long as -/// the returned reference is alive. -unsafe fn transmute_mut<'a, 'b, T: Sized + FromBytes>( - fw: &'a mut DmaObject, +/// * Callers must ensure that the device does not read/write to/from memory while the returned +/// slice is live. +/// * Callers must ensure that this call does not race with a read or write to the same region +/// while the returned slice is live. +unsafe fn transmute_mut( + fw: &mut DmaObject, offset: usize, -) -> Result<&'b mut T> { - if offset + size_of::() > fw.size() { - return Err(EINVAL); - } - if (fw.start_ptr_mut() as usize + offset) % align_of::() != 0 { - return Err(EINVAL); - } - - // SAFETY: we have checked that the pointer is properly aligned that its pointed memory is - // large enough the contains an instance of `T`, which implements `FromBytes`. - Ok(unsafe { &mut *(fw.start_ptr_mut().add(offset).cast::()) }) +) -> Result<&mut T> { + // SAFETY: The safety requirements of the function guarantee the device won't read + // or write to memory while the reference is alive and that this call won't race + // with writes or reads to the same memory region. + T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::())? }).ok_or(EINVAL) } /// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. @@ -284,32 +277,35 @@ impl FirmwareDmaObject { // Find the DMEM mapper section in the firmware. for i in 0..usize::from(hdr.entry_count) { - let app: &FalconAppifV1 = // SAFETY: we have exclusive access to `dma_object`. - unsafe { + let app: &FalconAppifV1 = unsafe { transmute( &dma_object, - hdr_offset + usize::from(hdr.header_size) + i * usize::from(hdr.entry_size) + hdr_offset + usize::from(hdr.header_size) + i * usize::from(hdr.entry_size), ) }?; if app.id != NVFW_FALCON_APPIF_ID_DMEMMAPPER { continue; } + let dmem_base = app.dmem_base; // SAFETY: we have exclusive access to `dma_object`. let dmem_mapper: &mut FalconAppifDmemmapperV3 = unsafe { - transmute_mut( - &mut dma_object, - (desc.imem_load_size + app.dmem_base) as usize, - ) + transmute_mut(&mut dma_object, (desc.imem_load_size + dmem_base) as usize) }?; + dmem_mapper.init_cmd = match cmd { + FwsecCommand::Frts { .. } => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS, + FwsecCommand::Sb => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB, + }; + let cmd_in_buffer_offset = dmem_mapper.cmd_in_buffer_offset; + // SAFETY: we have exclusive access to `dma_object`. let frts_cmd: &mut FrtsCmd = unsafe { transmute_mut( &mut dma_object, - (desc.imem_load_size + dmem_mapper.cmd_in_buffer_offset) as usize, + (desc.imem_load_size + cmd_in_buffer_offset) as usize, ) }?; @@ -320,24 +316,19 @@ impl FirmwareDmaObject { size: 0, flags: 2, }; - - dmem_mapper.init_cmd = match cmd { - FwsecCommand::Frts { - frts_addr, - frts_size, - } => { - frts_cmd.frts_region = FrtsRegion { - ver: 1, - hdr: u32::try_from(size_of::())?, - addr: u32::try_from(frts_addr >> 12)?, - size: u32::try_from(frts_size >> 12)?, - ftype: NVFW_FRTS_CMD_REGION_TYPE_FB, - }; - - NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS - } - FwsecCommand::Sb => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB, - }; + if let FwsecCommand::Frts { + frts_addr, + frts_size, + } = cmd + { + frts_cmd.frts_region = FrtsRegion { + ver: 1, + hdr: u32::try_from(size_of::())?, + addr: u32::try_from(frts_addr >> 12)?, + size: u32::try_from(frts_size >> 12)?, + ftype: NVFW_FRTS_CMD_REGION_TYPE_FB, + }; + } // Return early as we found and patched the DMEMMAPPER region. return Ok(Self(dma_object, PhantomData)); -- cgit From 571ce401a78ee9cb5992b74d36df1b6a8ffd4b16 Mon Sep 17 00:00:00 2001 From: Daniel del Castillo Date: Tue, 4 Nov 2025 20:37:49 +0100 Subject: gpu: nova-core: Fix capitalization of some comments Some comments that already existed didn't start with a capital letter, this patch fixes that. Signed-off-by: Daniel del Castillo [acourbot@nvidia.com: set prefix to "gpu: nova-core:".] Signed-off-by: Alexandre Courbot Message-ID: <20251104193756.57726-2-delcastillodelarosadaniel@gmail.com> --- drivers/gpu/nova-core/firmware/fwsec.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index e06ae931bf55..cb794e406395 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -59,7 +59,7 @@ struct FalconAppifHdrV1 { entry_size: u8, entry_count: u8, } -// SAFETY: any byte sequence is valid for this struct. +// SAFETY: Any byte sequence is valid for this struct. unsafe impl FromBytes for FalconAppifHdrV1 {} #[repr(C, packed)] @@ -68,7 +68,7 @@ struct FalconAppifV1 { id: u32, dmem_base: u32, } -// SAFETY: any byte sequence is valid for this struct. +// SAFETY: Any byte sequence is valid for this struct. unsafe impl FromBytes for FalconAppifV1 {} #[derive(Debug)] @@ -92,7 +92,7 @@ struct FalconAppifDmemmapperV3 { ucode_cmd_mask1: u32, multi_tgt_tbl: u32, } -// SAFETY: any byte sequence is valid for this struct. +// SAFETY: Any byte sequence is valid for this struct. unsafe impl FromBytes for FalconAppifDmemmapperV3 {} // SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. unsafe impl AsBytes for FalconAppifDmemmapperV3 {} @@ -106,7 +106,7 @@ struct ReadVbios { size: u32, flags: u32, } -// SAFETY: any byte sequence is valid for this struct. +// SAFETY: Any byte sequence is valid for this struct. unsafe impl FromBytes for ReadVbios {} // SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. unsafe impl AsBytes for ReadVbios {} @@ -120,7 +120,7 @@ struct FrtsRegion { size: u32, ftype: u32, } -// SAFETY: any byte sequence is valid for this struct. +// SAFETY: Any byte sequence is valid for this struct. unsafe impl FromBytes for FrtsRegion {} // SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. unsafe impl AsBytes for FrtsRegion {} @@ -132,7 +132,7 @@ struct FrtsCmd { read_vbios: ReadVbios, frts_region: FrtsRegion, } -// SAFETY: any byte sequence is valid for this struct. +// SAFETY: Any byte sequence is valid for this struct. unsafe impl FromBytes for FrtsCmd {} // SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. unsafe impl AsBytes for FrtsCmd {} -- cgit From 3577e265e4bdb09441941a52c1bf9dfd1e27d707 Mon Sep 17 00:00:00 2001 From: Daniel del Castillo Date: Tue, 4 Nov 2025 20:37:50 +0100 Subject: gpu: nova-core: Simplify `DmaObject::from_data` in nova-core/dma.rs This patch solves one of the existing mentions of COHA, a task in the Nova task list about improving the `CoherentAllocation` API. It uses the `write` method from `CoherentAllocation`. Signed-off-by: Daniel del Castillo [acourbot@nvidia.com: set prefix to "gpu: nova-core:".] Signed-off-by: Alexandre Courbot Message-ID: <20251104193756.57726-3-delcastillodelarosadaniel@gmail.com> --- drivers/gpu/nova-core/dma.rs | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/nova-core/dma.rs b/drivers/gpu/nova-core/dma.rs index 5b117aefdb15..7215398969da 100644 --- a/drivers/gpu/nova-core/dma.rs +++ b/drivers/gpu/nova-core/dma.rs @@ -30,20 +30,11 @@ impl DmaObject { } pub(crate) fn from_data(dev: &device::Device, data: &[u8]) -> Result { - Self::new(dev, data.len()).map(|mut dma_obj| { - // TODO[COHA]: replace with `CoherentAllocation::write()` once available. - // SAFETY: - // - `dma_obj`'s size is at least `data.len()`. - // - We have just created this object and there is no other user at this stage. - unsafe { - core::ptr::copy_nonoverlapping( - data.as_ptr(), - dma_obj.dma.start_ptr_mut(), - data.len(), - ); - } - - dma_obj + Self::new(dev, data.len()).and_then(|mut dma_obj| { + // SAFETY: We have just allocated the DMA memory, we are the only users and + // we haven't made the device aware of the handle yet. + unsafe { dma_obj.write(data, 0)? } + Ok(dma_obj) }) } } -- cgit From 453a73000c56d2ee21f327c0a2a3249aa359bcc9 Mon Sep 17 00:00:00 2001 From: Daniel del Castillo Date: Tue, 4 Nov 2025 20:37:51 +0100 Subject: Documentation: nova: Update the todo list This small patch updates the nova todo list to remove some tasks that have been solved lately: * COHA is solved in this patch series * TRSM was solved recently [1] [1] https://lore.kernel.org/rust-for-linux/DCEJ9SV4LBJL.11EUZVXX6EB9H@nvidia.com/ Signed-off-by: Daniel del Castillo [acourbot@nvidia.com: set prefix to "Documentation: nova:".] Signed-off-by: Alexandre Courbot Message-ID: <20251104193756.57726-4-delcastillodelarosadaniel@gmail.com> --- Documentation/gpu/nova/core/todo.rst | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/Documentation/gpu/nova/core/todo.rst b/Documentation/gpu/nova/core/todo.rst index c55c7bedbfdf..35cc7c31d423 100644 --- a/Documentation/gpu/nova/core/todo.rst +++ b/Documentation/gpu/nova/core/todo.rst @@ -44,25 +44,6 @@ automatically generates the corresponding mappings between a value and a number. | Complexity: Beginner | Link: https://docs.rs/num/latest/num/trait.FromPrimitive.html -Conversion from byte slices for types implementing FromBytes [TRSM] -------------------------------------------------------------------- - -We retrieve several structures from byte streams coming from the BIOS or loaded -firmware. At the moment converting the bytes slice into the proper type require -an inelegant `unsafe` operation; this will go away once `FromBytes` implements -a proper `from_bytes` method. - -| Complexity: Beginner - -CoherentAllocation improvements [COHA] --------------------------------------- - -`CoherentAllocation` needs a safe way to write into the allocation, and to -obtain slices within the allocation. - -| Complexity: Beginner -| Contact: Abdiel Janulgue - Generic register abstraction [REGA] ----------------------------------- -- cgit From 5525ac03ca7adec61d39f3fd3a143b5e294bdff7 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 16 Oct 2025 11:55:48 +0900 Subject: gpu: nova-core: add functions and traits for lossless integer conversions The core library's `From` implementations do not cover conversions that are not portable or future-proof. For instance, even though it is safe today, `From` is not implemented for `u64` because of the possibility to support larger-than-64bit architectures in the future. However, the kernel supports a narrower set of architectures, and in the case of Nova we only support 64-bit. This makes it helpful and desirable to provide more infallible conversions, lest we need to rely on the `as` keyword and carry the risk of silently losing data. Thus, introduce a new module `num` that provides safe const functions performing more conversions allowed by the build target, as well as `FromSafeCast` and `IntoSafeCast` traits that are just extensions of `From` and `Into` to conversions that are known to be lossless. Suggested-by: Danilo Krummrich Link: https://lore.kernel.org/rust-for-linux/DDK4KADWJHMG.1FUPL3SDR26XF@kernel.org/ Acked-by: Danilo Krummrich [acourbot@nvidia.com: fix merge conflicts after rebase.] Signed-off-by: Alexandre Courbot Message-ID: <20251029-nova-as-v3-4-6a30c7333ad9@nvidia.com> --- drivers/gpu/nova-core/nova_core.rs | 1 + drivers/gpu/nova-core/num.rs | 167 +++++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 drivers/gpu/nova-core/num.rs diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index e130166c1086..9180ec9c27ef 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -13,6 +13,7 @@ mod firmware; mod gfw; mod gpu; mod gsp; +mod num; mod regs; mod vbios; diff --git a/drivers/gpu/nova-core/num.rs b/drivers/gpu/nova-core/num.rs new file mode 100644 index 000000000000..457a1303640f --- /dev/null +++ b/drivers/gpu/nova-core/num.rs @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Numerical helpers functions and traits. +//! +//! This is essentially a staging module for code to mature until it can be moved to the `kernel` +//! crate. + +use kernel::{ + macros::paste, + prelude::*, // +}; + +/// Implements safe `as` conversion functions from a given type into a series of target types. +/// +/// These functions can be used in place of `as`, with the guarantee that they will be lossless. +macro_rules! impl_safe_as { + ($from:ty as { $($into:ty),* }) => { + $( + paste! { + #[doc = ::core::concat!( + "Losslessly converts a [`", + ::core::stringify!($from), + "`] into a [`", + ::core::stringify!($into), + "`].")] + /// + /// This conversion is allowed as it is always lossless. Prefer this over the `as` + /// keyword to ensure no lossy casts are performed. + /// + /// This is for use from a `const` context. For non `const` use, prefer the + /// [`FromSafeCast`] and [`IntoSafeCast`] traits. + /// + /// # Examples + /// + /// ``` + /// use crate::num; + /// + #[doc = ::core::concat!( + "assert_eq!(num::", + ::core::stringify!($from), + "_as_", + ::core::stringify!($into), + "(1", + ::core::stringify!($from), + "), 1", + ::core::stringify!($into), + ");")] + /// ``` + #[allow(unused)] + #[inline(always)] + pub(crate) const fn [<$from _as_ $into>](value: $from) -> $into { + kernel::static_assert!(size_of::<$into>() >= size_of::<$from>()); + + value as $into + } + } + )* + }; +} + +impl_safe_as!(u8 as { u16, u32, u64, usize }); +impl_safe_as!(u16 as { u32, u64, usize }); +impl_safe_as!(u32 as { u64, usize } ); +// `u64` and `usize` have the same size on 64-bit platforms. +#[cfg(CONFIG_64BIT)] +impl_safe_as!(u64 as { usize } ); + +// A `usize` fits into a `u64` on 32 and 64-bit platforms. +#[cfg(any(CONFIG_32BIT, CONFIG_64BIT))] +impl_safe_as!(usize as { u64 }); + +// A `usize` fits into a `u32` on 32-bit platforms. +#[cfg(CONFIG_32BIT)] +impl_safe_as!(usize as { u32 }); + +/// Extension trait providing guaranteed lossless cast to `Self` from `T`. +/// +/// The standard library's `From` implementations do not cover conversions that are not portable or +/// future-proof. For instance, even though it is safe today, `From` is not implemented for +/// [`u64`] because of the possibility to support larger-than-64bit architectures in the future. +/// +/// The workaround is to either deal with the error handling of [`TryFrom`] for an operation that +/// technically cannot fail, or to use the `as` keyword, which can silently strip data if the +/// destination type is smaller than the source. +/// +/// Both options are hardly acceptable for the kernel. It is also a much more architecture +/// dependent environment, supporting only 32 and 64 bit architectures, with some modules +/// explicitly depending on a specific bus width that could greatly benefit from infallible +/// conversion operations. +/// +/// Thus this extension trait that provides, for the architecture the kernel is built for, safe +/// conversion between types for which such cast is lossless. +/// +/// In other words, this trait is implemented if, for the current build target and with `t: T`, the +/// `t as Self` operation is completely lossless. +/// +/// Prefer this over the `as` keyword to ensure no lossy casts are performed. +/// +/// If you need to perform a conversion in `const` context, use [`u64_as_usize`], [`u32_as_usize`], +/// [`usize_as_u64`], etc. +/// +/// # Examples +/// +/// ``` +/// use crate::num::FromSafeCast; +/// +/// assert_eq!(usize::from_safe_cast(0xf00u32), 0xf00u32 as usize); +/// ``` +#[expect(unused)] +pub(crate) trait FromSafeCast { + /// Create a `Self` from `value`. This operation is guaranteed to be lossless. + fn from_safe_cast(value: T) -> Self; +} + +impl FromSafeCast for u64 { + fn from_safe_cast(value: usize) -> Self { + usize_as_u64(value) + } +} + +#[cfg(CONFIG_32BIT)] +impl FromSafeCast for u32 { + fn from_safe_cast(value: usize) -> Self { + usize_as_u32(value) + } +} + +impl FromSafeCast for usize { + fn from_safe_cast(value: u32) -> Self { + u32_as_usize(value) + } +} + +#[cfg(CONFIG_64BIT)] +impl FromSafeCast for usize { + fn from_safe_cast(value: u64) -> Self { + u64_as_usize(value) + } +} + +/// Counterpart to the [`FromSafeCast`] trait, i.e. this trait is to [`FromSafeCast`] what [`Into`] +/// is to [`From`]. +/// +/// See the documentation of [`FromSafeCast`] for the motivation. +/// +/// # Examples +/// +/// ``` +/// use crate::num::IntoSafeCast; +/// +/// assert_eq!(0xf00u32.into_safe_cast(), 0xf00u32 as usize); +/// ``` +#[expect(unused)] +pub(crate) trait IntoSafeCast { + /// Convert `self` into a `T`. This operation is guaranteed to be lossless. + fn into_safe_cast(self) -> T; +} + +/// Reverse operation for types implementing [`FromSafeCast`]. +impl IntoSafeCast for S +where + T: FromSafeCast, +{ + fn into_safe_cast(self) -> T { + T::from_safe_cast(self) + } +} -- cgit From 84e2b401bcc551e7c2e1a995f90cce421bce5bfd Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 27 Oct 2025 23:12:31 +0900 Subject: gpu: nova-core: replace use of `as` with functions from `num` Use the newly-introduced `num` module to replace the use of `as` wherever it is safe to do. This ensures that a given conversion cannot lose data if its source or destination type ever changes. Acked-by: Danilo Krummrich [acourbot@nvidia.com: fix merge conflicts after rebase.] Signed-off-by: Alexandre Courbot Message-ID: <20251029-nova-as-v3-5-6a30c7333ad9@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 8 +++++-- drivers/gpu/nova-core/fb.rs | 7 +++--- drivers/gpu/nova-core/firmware.rs | 12 +++++++---- drivers/gpu/nova-core/firmware/booter.rs | 37 ++++++++++++++++++++------------ drivers/gpu/nova-core/firmware/fwsec.rs | 17 ++++++++++----- drivers/gpu/nova-core/firmware/gsp.rs | 6 ++++-- drivers/gpu/nova-core/firmware/riscv.rs | 9 ++++---- drivers/gpu/nova-core/num.rs | 2 -- drivers/gpu/nova-core/regs.rs | 5 +++-- drivers/gpu/nova-core/vbios.rs | 9 ++++---- 10 files changed, 70 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 8efc910f20af..0116cb918fc8 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -22,6 +22,10 @@ use crate::{ dma::DmaObject, driver::Bar0, gpu::Chipset, + num::{ + FromSafeCast, + IntoSafeCast, // + }, regs, regs::macros::RegisterBase, // }; @@ -450,7 +454,7 @@ impl Falcon { FalconMem::Imem => (load_offsets.src_start, fw.dma_handle()), FalconMem::Dmem => ( 0, - fw.dma_handle_with_offset(load_offsets.src_start as usize)?, + fw.dma_handle_with_offset(load_offsets.src_start.into_safe_cast())?, ), }; if dma_start % DmaAddress::from(DMA_LEN) > 0 { @@ -476,7 +480,7 @@ impl Falcon { dev_err!(self.dev, "DMA transfer length overflow"); return Err(EOVERFLOW); } - Some(upper_bound) if upper_bound as usize > fw.size() => { + Some(upper_bound) if usize::from_safe_cast(upper_bound) > fw.size() => { dev_err!(self.dev, "DMA transfer goes beyond range of DMA object"); return Err(EINVAL); } diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs index 989bbfd5bdee..a99223f73367 100644 --- a/drivers/gpu/nova-core/fb.rs +++ b/drivers/gpu/nova-core/fb.rs @@ -17,6 +17,7 @@ use crate::{ dma::DmaObject, driver::Bar0, gpu::Chipset, + num::usize_as_u64, regs, // }; @@ -112,14 +113,14 @@ impl FbLayout { let vga_workspace = { let vga_base = { - const NV_PRAMIN_SIZE: u64 = SZ_1M as u64; + const NV_PRAMIN_SIZE: u64 = usize_as_u64(SZ_1M); let base = fb.end - NV_PRAMIN_SIZE; if hal.supports_display(bar) { match regs::NV_PDISP_VGA_WORKSPACE_BASE::read(bar).vga_workspace_addr() { Some(addr) => { if addr < base { - const VBIOS_WORKSPACE_SIZE: u64 = SZ_128K as u64; + const VBIOS_WORKSPACE_SIZE: u64 = usize_as_u64(SZ_128K); // Point workspace address to end of framebuffer. fb.end - VBIOS_WORKSPACE_SIZE @@ -139,7 +140,7 @@ impl FbLayout { let frts = { const FRTS_DOWN_ALIGN: Alignment = Alignment::new::(); - const FRTS_SIZE: u64 = SZ_1M as u64; + const FRTS_SIZE: u64 = usize_as_u64(SZ_1M); let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) - FRTS_SIZE; frts_base..frts_base + FRTS_SIZE diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 163b746f03ef..2d2008b33fb4 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -16,7 +16,11 @@ use kernel::{ use crate::{ dma::DmaObject, falcon::FalconFirmware, - gpu, // + gpu, + num::{ + FromSafeCast, + IntoSafeCast, // + }, }; pub(crate) mod booter; @@ -78,7 +82,7 @@ impl FalconUCodeDescV3 { const HDR_SIZE_SHIFT: u32 = 16; const HDR_SIZE_MASK: u32 = 0xffff0000; - ((self.hdr & HDR_SIZE_MASK) >> HDR_SIZE_SHIFT) as usize + ((self.hdr & HDR_SIZE_MASK) >> HDR_SIZE_SHIFT).into_safe_cast() } } @@ -193,8 +197,8 @@ impl<'a> BinFirmware<'a> { /// Returns the data payload of the firmware, or `None` if the data range is out of bounds of /// the firmware image. fn data(&self) -> Option<&[u8]> { - let fw_start = self.hdr.data_offset as usize; - let fw_size = self.hdr.data_size as usize; + let fw_start = usize::from_safe_cast(self.hdr.data_offset); + let fw_size = usize::from_safe_cast(self.hdr.data_size); self.fw.get(fw_start..fw_start + fw_size) } diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs index 1e8f6c99fa2e..f107f753214a 100644 --- a/drivers/gpu/nova-core/firmware/booter.rs +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -34,6 +34,10 @@ use crate::{ Unsigned, // }, gpu::Chipset, + num::{ + FromSafeCast, + IntoSafeCast, // + }, }; /// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at @@ -91,7 +95,7 @@ impl<'a> HsFirmwareV2<'a> { /// /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. fn new(bin_fw: &BinFirmware<'a>) -> Result { - frombytes_at::(bin_fw.fw, bin_fw.hdr.header_offset as usize) + frombytes_at::(bin_fw.fw, bin_fw.hdr.header_offset.into_safe_cast()) .map(|hdr| Self { hdr, fw: bin_fw.fw }) } @@ -100,7 +104,7 @@ impl<'a> HsFirmwareV2<'a> { /// Fails if the offset of the patch location is outside the bounds of the firmware /// image. fn patch_location(&self) -> Result { - frombytes_at::(self.fw, self.hdr.patch_loc_offset as usize) + frombytes_at::(self.fw, self.hdr.patch_loc_offset.into_safe_cast()) } /// Returns an iterator to the signatures of the firmware. The iterator can be empty if the @@ -108,19 +112,23 @@ impl<'a> HsFirmwareV2<'a> { /// /// Fails if the pointed signatures are outside the bounds of the firmware image. fn signatures_iter(&'a self) -> Result>> { - let num_sig = frombytes_at::(self.fw, self.hdr.num_sig_offset as usize)?; + let num_sig = frombytes_at::(self.fw, self.hdr.num_sig_offset.into_safe_cast())?; let iter = match self.hdr.sig_prod_size.checked_div(num_sig) { // If there are no signatures, return an iterator that will yield zero elements. None => (&[] as &[u8]).chunks_exact(1), Some(sig_size) => { - let patch_sig = frombytes_at::(self.fw, self.hdr.patch_sig_offset as usize)?; - let signatures_start = (self.hdr.sig_prod_offset + patch_sig) as usize; + let patch_sig = + frombytes_at::(self.fw, self.hdr.patch_sig_offset.into_safe_cast())?; + let signatures_start = usize::from_safe_cast(self.hdr.sig_prod_offset + patch_sig); self.fw // Get signatures range. - .get(signatures_start..signatures_start + self.hdr.sig_prod_size as usize) + .get( + signatures_start + ..signatures_start + usize::from_safe_cast(self.hdr.sig_prod_size), + ) .ok_or(EINVAL)? - .chunks_exact(sig_size as usize) + .chunks_exact(sig_size.into_safe_cast()) } }; @@ -149,9 +157,9 @@ impl HsSignatureParams { /// Fails if the meta data parameter of `hs_fw` is outside the bounds of the firmware image, or /// if its size doesn't match that of [`HsSignatureParams`]. fn new(hs_fw: &HsFirmwareV2<'_>) -> Result { - let start = hs_fw.hdr.meta_data_offset as usize; + let start = usize::from_safe_cast(hs_fw.hdr.meta_data_offset); let end = start - .checked_add(hs_fw.hdr.meta_data_size as usize) + .checked_add(hs_fw.hdr.meta_data_size.into_safe_cast()) .ok_or(EINVAL)?; hs_fw @@ -186,7 +194,7 @@ impl HsLoadHeaderV2 { /// /// Fails if the header pointed at by `hs_fw` is not within the bounds of the firmware image. fn new(hs_fw: &HsFirmwareV2<'_>) -> Result { - frombytes_at::(hs_fw.fw, hs_fw.hdr.header_offset as usize) + frombytes_at::(hs_fw.fw, hs_fw.hdr.header_offset.into_safe_cast()) } } @@ -215,12 +223,13 @@ impl HsLoadHeaderV2App { } else { frombytes_at::( hs_fw.fw, - (hs_fw.hdr.header_offset as usize) + usize::from_safe_cast(hs_fw.hdr.header_offset) // Skip the load header... .checked_add(size_of::()) // ... and jump to app header `idx`. .and_then(|offset| { - offset.checked_add((idx as usize).checked_mul(size_of::())?) + offset + .checked_add(usize::from_safe_cast(idx).checked_mul(size_of::())?) }) .ok_or(EINVAL)?, ) @@ -335,12 +344,12 @@ impl BooterFirmware { dev_err!(dev, "invalid fuse version for Booter firmware\n"); return Err(EINVAL); }; - signatures.nth(idx as usize) + signatures.nth(idx.into_safe_cast()) } } .ok_or(EINVAL)?; - ucode.patch_signature(&signature, patch_loc as usize)? + ucode.patch_signature(&signature, patch_loc.into_safe_cast())? } }; diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index cb794e406395..b28e34d279f4 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -46,6 +46,10 @@ use crate::{ Signed, Unsigned, // }, + num::{ + FromSafeCast, + IntoSafeCast, // + }, vbios::Vbios, }; @@ -267,7 +271,7 @@ impl FirmwareDmaObject { let ucode = bios.fwsec_image().ucode(desc)?; let mut dma_object = DmaObject::from_data(dev, ucode)?; - let hdr_offset = (desc.imem_load_size + desc.interface_offset) as usize; + let hdr_offset = usize::from_safe_cast(desc.imem_load_size + desc.interface_offset); // SAFETY: we have exclusive access to `dma_object`. let hdr: &FalconAppifHdrV1 = unsafe { transmute(&dma_object, hdr_offset) }?; @@ -292,7 +296,10 @@ impl FirmwareDmaObject { // SAFETY: we have exclusive access to `dma_object`. let dmem_mapper: &mut FalconAppifDmemmapperV3 = unsafe { - transmute_mut(&mut dma_object, (desc.imem_load_size + dmem_base) as usize) + transmute_mut( + &mut dma_object, + (desc.imem_load_size + dmem_base).into_safe_cast(), + ) }?; dmem_mapper.init_cmd = match cmd { @@ -305,7 +312,7 @@ impl FirmwareDmaObject { let frts_cmd: &mut FrtsCmd = unsafe { transmute_mut( &mut dma_object, - (desc.imem_load_size + cmd_in_buffer_offset) as usize, + (desc.imem_load_size + cmd_in_buffer_offset).into_safe_cast(), ) }?; @@ -353,7 +360,7 @@ impl FwsecFirmware { // Patch signature if needed. let desc = bios.fwsec_image().header()?; let ucode_signed = if desc.signature_count != 0 { - let sig_base_img = (desc.imem_load_size + desc.pkc_data_offset) as usize; + let sig_base_img = usize::from_safe_cast(desc.imem_load_size + desc.pkc_data_offset); let desc_sig_versions = u32::from(desc.signature_versions); let reg_fuse_version = falcon.signature_reg_fuse_version(bar, desc.engine_id_mask, desc.ucode_id)?; @@ -384,7 +391,7 @@ impl FwsecFirmware { // Mask of the bits of `desc_sig_versions` to preserve. let reg_fuse_version_mask = reg_fuse_version_bit.wrapping_sub(1); - (desc_sig_versions & reg_fuse_version_mask).count_ones() as usize + usize::from_safe_cast((desc_sig_versions & reg_fuse_version_mask).count_ones()) }; dev_dbg!(dev, "patching signature with index {}\n", signature_idx); diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs index 939e036896bf..72766feae36e 100644 --- a/drivers/gpu/nova-core/firmware/gsp.rs +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -24,6 +24,7 @@ use crate::{ Chipset, // }, gsp::GSP_PAGE_SIZE, + num::FromSafeCast, }; /// Ad-hoc and temporary module to extract sections from ELF images. @@ -245,10 +246,11 @@ impl GspFirmware { fn map_into_lvl(sg_table: &SGTable>>, mut dst: VVec) -> Result> { for sg_entry in sg_table.iter() { // Number of pages we need to map. - let num_pages = (sg_entry.dma_len() as usize).div_ceil(GSP_PAGE_SIZE); + let num_pages = usize::from_safe_cast(sg_entry.dma_len()).div_ceil(GSP_PAGE_SIZE); for i in 0..num_pages { - let entry = sg_entry.dma_address() + (i as u64 * GSP_PAGE_SIZE as u64); + let entry = sg_entry.dma_address() + + (u64::from_safe_cast(i) * u64::from_safe_cast(GSP_PAGE_SIZE)); dst.extend_from_slice(&entry.to_le_bytes(), GFP_KERNEL)?; } } diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs index 196dedb96aeb..270b2c7dc219 100644 --- a/drivers/gpu/nova-core/firmware/riscv.rs +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -14,7 +14,8 @@ use kernel::{ use crate::{ dma::DmaObject, - firmware::BinFirmware, // + firmware::BinFirmware, + num::FromSafeCast, // }; /// Descriptor for microcode running on a RISC-V core. @@ -45,7 +46,7 @@ impl RmRiscvUCodeDesc { /// /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. fn new(bin_fw: &BinFirmware<'_>) -> Result { - let offset = bin_fw.hdr.header_offset as usize; + let offset = usize::from_safe_cast(bin_fw.hdr.header_offset); bin_fw .fw @@ -78,8 +79,8 @@ impl RiscvFirmware { let riscv_desc = RmRiscvUCodeDesc::new(&bin_fw)?; let ucode = { - let start = bin_fw.hdr.data_offset as usize; - let len = bin_fw.hdr.data_size as usize; + let start = usize::from_safe_cast(bin_fw.hdr.data_offset); + let len = usize::from_safe_cast(bin_fw.hdr.data_size); DmaObject::from_data(dev, fw.data().get(start..start + len).ok_or(EINVAL)?)? }; diff --git a/drivers/gpu/nova-core/num.rs b/drivers/gpu/nova-core/num.rs index 457a1303640f..92a91b9e30de 100644 --- a/drivers/gpu/nova-core/num.rs +++ b/drivers/gpu/nova-core/num.rs @@ -106,7 +106,6 @@ impl_safe_as!(usize as { u32 }); /// /// assert_eq!(usize::from_safe_cast(0xf00u32), 0xf00u32 as usize); /// ``` -#[expect(unused)] pub(crate) trait FromSafeCast { /// Create a `Self` from `value`. This operation is guaranteed to be lossless. fn from_safe_cast(value: T) -> Self; @@ -150,7 +149,6 @@ impl FromSafeCast for usize { /// /// assert_eq!(0xf00u32.into_safe_cast(), 0xf00u32 as usize); /// ``` -#[expect(unused)] pub(crate) trait IntoSafeCast { /// Convert `self` into a `T`. This operation is guaranteed to be lossless. fn into_safe_cast(self) -> T; diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 7cd2e8a4d4c6..934003cab8a8 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -26,6 +26,7 @@ use crate::{ Architecture, Chipset, // }, + num::FromSafeCast, }; // PMC @@ -89,7 +90,7 @@ impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE { /// Returns the usable framebuffer size, in bytes. pub(crate) fn usable_fb_size(self) -> u64 { let size = (u64::from(self.lower_mag()) << u64::from(self.lower_scale())) - * kernel::sizes::SZ_1M as u64; + * u64::from_safe_cast(kernel::sizes::SZ_1M); if self.ecc_mode_enabled() { // Remove the amount of memory reserved for ECC (one per 16 units). @@ -172,7 +173,7 @@ register!( impl NV_USABLE_FB_SIZE_IN_MB { /// Returns the usable framebuffer size, in bytes. pub(crate) fn usable_fb_size(self) -> u64 { - u64::from(self.value()) * kernel::sizes::SZ_1M as u64 + u64::from(self.value()) * u64::from_safe_cast(kernel::sizes::SZ_1M) } } diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 9c5b93adeb96..abf423560ff4 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -21,6 +21,7 @@ use crate::{ fwsec::Bcrt30Rsa3kSignature, FalconUCodeDescV3, // }, + num::FromSafeCast, }; /// The offset of the VBIOS ROM in the BAR0 space. @@ -795,7 +796,7 @@ impl PciAtBiosImage { let data_ptr = u32::from_le_bytes(bytes); - if (data_ptr as usize) < self.base.data.len() { + if (usize::from_safe_cast(data_ptr)) < self.base.data.len() { dev_err!(self.base.dev, "Falcon data pointer out of bounds\n"); return Err(EINVAL); } @@ -922,7 +923,7 @@ impl FwSecBiosBuilder { pci_at_image: &PciAtBiosImage, first_fwsec: &FwSecBiosBuilder, ) -> Result { - let mut offset = pci_at_image.falcon_data_ptr()? as usize; + let mut offset = usize::from_safe_cast(pci_at_image.falcon_data_ptr()?); let mut pmu_in_first_fwsec = false; // The falcon data pointer assumes that the PciAt and FWSEC images @@ -963,7 +964,7 @@ impl FwSecBiosBuilder { .find_entry_by_type(FALCON_UCODE_ENTRY_APPID_FWSEC_PROD) { Ok(entry) => { - let mut ucode_offset = entry.data as usize; + let mut ucode_offset = usize::from_safe_cast(entry.data); ucode_offset -= pci_at_image.base.data.len(); if ucode_offset < first_fwsec.base.data.len() { dev_err!(self.base.dev, "Falcon Ucode offset not in second Fwsec.\n"); @@ -1049,7 +1050,7 @@ impl FwSecBiosImage { // The ucode data follows the descriptor. let ucode_data_offset = falcon_ucode_offset + desc.size(); - let size = (desc.imem_load_size + desc.dmem_load_size) as usize; + let size = usize::from_safe_cast(desc.imem_load_size + desc.dmem_load_size); // Get the data slice, checking bounds in a single operation. self.base -- cgit From 80b3dc0a5a2e51fb2b8f3406f5ee20ad4a652316 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sun, 26 Oct 2025 22:06:54 +0900 Subject: gpu: nova-core: justify remaining uses of `as` There are a few remaining cases where we *do* want to use `as`, because we specifically want to strip the data that does not fit into the destination type. Comment these uses to clear confusion about the intent. Acked-by: Danilo Krummrich [acourbot@nvidia.com: fix merge conflicts after rebase.] Signed-off-by: Alexandre Courbot Message-ID: <20251029-nova-as-v3-6-6a30c7333ad9@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 4 ++++ drivers/gpu/nova-core/fb/hal/ga100.rs | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 0116cb918fc8..fe5b3256d972 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -490,9 +490,13 @@ impl Falcon { // Set up the base source DMA address. regs::NV_PFALCON_FALCON_DMATRFBASE::default() + // CAST: `as u32` is used on purpose since we do want to strip the upper bits, which + // will be written to `NV_PFALCON_FALCON_DMATRFBASE1`. .set_base((dma_start >> 8) as u32) .write(bar, &E::ID); regs::NV_PFALCON_FALCON_DMATRFBASE1::default() + // CAST: `as u16` is used on purpose since the remaining bits are guaranteed to fit + // within a `u16`. .set_base((dma_start >> 40) as u16) .write(bar, &E::ID); diff --git a/drivers/gpu/nova-core/fb/hal/ga100.rs b/drivers/gpu/nova-core/fb/hal/ga100.rs index dae392c38a1b..e0acc41aa7cd 100644 --- a/drivers/gpu/nova-core/fb/hal/ga100.rs +++ b/drivers/gpu/nova-core/fb/hal/ga100.rs @@ -20,9 +20,13 @@ pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 { pub(super) fn write_sysmem_flush_page_ga100(bar: &Bar0, addr: u64) { regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::default() + // CAST: `as u32` is used on purpose since the remaining bits are guaranteed to fit within + // a `u32`. .set_adr_63_40((addr >> FLUSH_SYSMEM_ADDR_SHIFT_HI) as u32) .write(bar); regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() + // CAST: `as u32` is used on purpose since we want to strip the upper bits that have been + // written to `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`. .set_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32) .write(bar); } -- cgit From e54ad0cd3673c93cdafda58505eaa81610fe3aef Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 7 Nov 2025 15:25:56 -0500 Subject: rust/drm/gem: Fix missing header in `Object` rustdoc Invariants should be prefixed with a # to turn it into a header. There are no functional changes in this patch. Cc: stable@vger.kernel.org Fixes: c284d3e42338 ("rust: drm: gem: Add GEM object abstraction") Signed-off-by: Lyude Paul Link: https://patch.msgid.link/20251107202603.465932-1-lyude@redhat.com Signed-off-by: Alice Ryhl --- rust/kernel/drm/gem/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index eb5f3feac890..a7f682e95c01 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -164,7 +164,7 @@ impl BaseObject for T {} /// A base GEM object. /// -/// Invariants +/// # Invariants /// /// - `self.obj` is a valid instance of a `struct drm_gem_object`. #[repr(C)] -- cgit From 7c01dc25f5c828401a5807307c4f7dda6555469f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 10 Nov 2025 22:34:09 +0900 Subject: gpu: nova-core: compute layout of more framebuffer regions required for GSP Compute more of the required FB layout information to boot the GSP firmware. This information is dependent on the firmware itself, so first we need to import and abstract the required firmware bindings in the `nvfw` module. Then, a new FB HAL method is introduced in `fb::hal` that uses these bindings and hardware information to compute the correct layout information. This information is then used in `fb` and the result made visible in `FbLayout`. These 3 things are grouped into the same patch to avoid lots of unused warnings that would be tedious to work around. As they happen in different files, they should not be too difficult to track separately. Acked-by: Danilo Krummrich Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-1-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/fb.rs | 71 +++++++++++- drivers/gpu/nova-core/firmware/gsp.rs | 4 +- drivers/gpu/nova-core/firmware/riscv.rs | 2 +- drivers/gpu/nova-core/gsp.rs | 5 + drivers/gpu/nova-core/gsp/boot.rs | 4 +- drivers/gpu/nova-core/gsp/fw.rs | 113 ++++++++++++++++++- drivers/gpu/nova-core/gsp/fw/r570_144.rs | 1 - drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs | 125 ++++++++++++++++++++++ 8 files changed, 314 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs index a99223f73367..156d9bf1f191 100644 --- a/drivers/gpu/nova-core/fb.rs +++ b/drivers/gpu/nova-core/fb.rs @@ -16,9 +16,14 @@ use kernel::{ use crate::{ dma::DmaObject, driver::Bar0, + firmware::gsp::GspFirmware, gpu::Chipset, - num::usize_as_u64, - regs, // + gsp, + num::{ + usize_as_u64, + FromSafeCast, // + }, + regs, }; mod hal; @@ -95,14 +100,27 @@ impl SysmemFlush { #[derive(Debug)] #[expect(dead_code)] pub(crate) struct FbLayout { + /// Range of the framebuffer. Starts at `0`. pub(crate) fb: Range, + /// VGA workspace, small area of reserved memory at the end of the framebuffer. pub(crate) vga_workspace: Range, + /// FRTS range. pub(crate) frts: Range, + /// Memory area containing the GSP bootloader image. + pub(crate) boot: Range, + /// Memory area containing the GSP firmware image. + pub(crate) elf: Range, + /// WPR2 heap. + pub(crate) wpr2_heap: Range, + /// WPR2 region range, starting with an instance of `GspFwWprMeta`. + pub(crate) wpr2: Range, + pub(crate) heap: Range, + pub(crate) vf_partition_count: u8, } impl FbLayout { - /// Computes the FB layout. - pub(crate) fn new(chipset: Chipset, bar: &Bar0) -> Result { + /// Computes the FB layout for `chipset` required to run the `gsp_fw` GSP firmware. + pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw: &GspFirmware) -> Result { let hal = hal::fb_hal(chipset); let fb = { @@ -146,10 +164,55 @@ impl FbLayout { frts_base..frts_base + FRTS_SIZE }; + let boot = { + const BOOTLOADER_DOWN_ALIGN: Alignment = Alignment::new::(); + let bootloader_size = u64::from_safe_cast(gsp_fw.bootloader.ucode.size()); + let bootloader_base = (frts.start - bootloader_size).align_down(BOOTLOADER_DOWN_ALIGN); + + bootloader_base..bootloader_base + bootloader_size + }; + + let elf = { + const ELF_DOWN_ALIGN: Alignment = Alignment::new::(); + let elf_size = u64::from_safe_cast(gsp_fw.size); + let elf_addr = (boot.start - elf_size).align_down(ELF_DOWN_ALIGN); + + elf_addr..elf_addr + elf_size + }; + + let wpr2_heap = { + const WPR2_HEAP_DOWN_ALIGN: Alignment = Alignment::new::(); + let wpr2_heap_size = + gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end); + let wpr2_heap_addr = (elf.start - wpr2_heap_size).align_down(WPR2_HEAP_DOWN_ALIGN); + + wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN) + }; + + let wpr2 = { + const WPR2_DOWN_ALIGN: Alignment = Alignment::new::(); + let wpr2_addr = (wpr2_heap.start - u64::from_safe_cast(size_of::())) + .align_down(WPR2_DOWN_ALIGN); + + wpr2_addr..frts.end + }; + + let heap = { + const HEAP_SIZE: u64 = usize_as_u64(SZ_1M); + + wpr2.start - HEAP_SIZE..wpr2.start + }; + Ok(Self { fb, vga_workspace, frts, + boot, + elf, + wpr2_heap, + wpr2, + heap, + vf_partition_count: 0, }) } } diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs index 72766feae36e..471ace238f62 100644 --- a/drivers/gpu/nova-core/firmware/gsp.rs +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -143,11 +143,11 @@ pub(crate) struct GspFirmware { /// Level 0 page table (single 4KB page) with one entry: DMA address of first level 1 page. level0: DmaObject, /// Size in bytes of the firmware contained in [`Self::fw`]. - size: usize, + pub(crate) size: usize, /// Device-mapped GSP signatures matching the GPU's [`Chipset`]. signatures: DmaObject, /// GSP bootloader, verifies the GSP firmware before loading and running it. - bootloader: RiscvFirmware, + pub(crate) bootloader: RiscvFirmware, } impl GspFirmware { diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs index 270b2c7dc219..3838fab8f1c0 100644 --- a/drivers/gpu/nova-core/firmware/riscv.rs +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -68,7 +68,7 @@ pub(crate) struct RiscvFirmware { /// Application version. app_version: u32, /// Device-mapped firmware image. - ucode: DmaObject, + pub(crate) ucode: DmaObject, } impl RiscvFirmware { diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 64e472e7a9d3..55a1ad90a373 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -6,6 +6,11 @@ use kernel::prelude::*; mod fw; +pub(crate) use fw::{ + GspFwWprMeta, + LibosParams, // +}; + pub(crate) const GSP_PAGE_SHIFT: usize = 12; pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT; diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 19dddff929da..979d3391e58c 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -127,12 +127,12 @@ impl super::Gsp { let bios = Vbios::new(dev, bar)?; - let _gsp_fw = KBox::pin_init( + let gsp_fw = KBox::pin_init( GspFirmware::new(dev, chipset, FIRMWARE_VERSION)?, GFP_KERNEL, )?; - let fb_layout = FbLayout::new(chipset, bar)?; + let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?; dev_dbg!(dev, "{:#x?}\n", fb_layout); Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index 34226dd00982..436c00d07b16 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -3,5 +3,116 @@ mod r570_144; // Alias to avoid repeating the version number with every use. -#[expect(unused)] use r570_144 as bindings; + +use core::ops::Range; + +use kernel::{ + ptr::{ + Alignable, + Alignment, // + }, + sizes::SZ_1M, +}; + +use crate::{ + gpu::Chipset, + num::{ + self, + FromSafeCast, // + }, +}; + +/// Empty type to group methods related to heap parameters for running the GSP firmware. +enum GspFwHeapParams {} + +/// Minimum required alignment for the GSP heap. +const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20 }>(); + +impl GspFwHeapParams { + /// Returns the amount of GSP-RM heap memory used during GSP-RM boot and initialization (up to + /// and including the first client subdevice allocation). + fn base_rm_size(_chipset: Chipset) -> u64 { + // TODO: this needs to be updated to return the correct value for Hopper+ once support for + // them is added: + // u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100) + u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X) + } + + /// Returns the amount of heap memory required to support a single channel allocation. + fn client_alloc_size() -> u64 { + u64::from(bindings::GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE) + .align_up(GSP_HEAP_ALIGNMENT) + .unwrap_or(u64::MAX) + } + + /// Returns the amount of memory to reserve for management purposes for a framebuffer of size + /// `fb_size`. + fn management_overhead(fb_size: u64) -> u64 { + let fb_size_gb = fb_size.div_ceil(u64::from_safe_cast(kernel::sizes::SZ_1G)); + + u64::from(bindings::GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB) + .saturating_mul(fb_size_gb) + .align_up(GSP_HEAP_ALIGNMENT) + .unwrap_or(u64::MAX) + } +} + +/// Heap memory requirements and constraints for a given version of the GSP LIBOS. +pub(crate) struct LibosParams { + /// The base amount of heap required by the GSP operating system, in bytes. + carveout_size: u64, + /// The minimum and maximum sizes allowed for the GSP FW heap, in bytes. + allowed_heap_size: Range, +} + +impl LibosParams { + /// Version 2 of the GSP LIBOS (Turing and GA100) + const LIBOS2: LibosParams = LibosParams { + carveout_size: num::u32_as_u64(bindings::GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS2), + allowed_heap_size: num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MIN_MB) + * num::usize_as_u64(SZ_1M) + ..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MAX_MB) + * num::usize_as_u64(SZ_1M), + }; + + /// Version 3 of the GSP LIBOS (GA102+) + const LIBOS3: LibosParams = LibosParams { + carveout_size: num::u32_as_u64(bindings::GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL), + allowed_heap_size: num::u32_as_u64( + bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB, + ) * num::usize_as_u64(SZ_1M) + ..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB) + * num::usize_as_u64(SZ_1M), + }; + + /// Returns the libos parameters corresponding to `chipset`. + pub(crate) fn from_chipset(chipset: Chipset) -> &'static LibosParams { + if chipset < Chipset::GA102 { + &Self::LIBOS2 + } else { + &Self::LIBOS3 + } + } + + /// Returns the amount of memory (in bytes) to allocate for the WPR heap for a framebuffer size + /// of `fb_size` (in bytes) for `chipset`. + pub(crate) fn wpr_heap_size(&self, chipset: Chipset, fb_size: u64) -> u64 { + // The WPR heap will contain the following: + // LIBOS carveout, + self.carveout_size + // RM boot working memory, + .saturating_add(GspFwHeapParams::base_rm_size(chipset)) + // One RM client, + .saturating_add(GspFwHeapParams::client_alloc_size()) + // Overhead for memory management. + .saturating_add(GspFwHeapParams::management_overhead(fb_size)) + // Clamp to the supported heap sizes. + .clamp(self.allowed_heap_size.start, self.allowed_heap_size.end - 1) + } +} + +/// Structure passed to the GSP bootloader, containing the framebuffer layout as well as the DMA +/// addresses of the GSP bootloader and firmware. +#[repr(transparent)] +pub(crate) struct GspFwWprMeta(bindings::GspFwWprMeta); diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144.rs b/drivers/gpu/nova-core/gsp/fw/r570_144.rs index 35cb0370a7c9..82a973cd99c3 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144.rs @@ -12,7 +12,6 @@ #![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] #![allow( dead_code, - unused_imports, clippy::all, clippy::undocumented_unsafe_blocks, clippy::ptr_as_ptr, diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index cec594032515..0407000cca22 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -1 +1,126 @@ // SPDX-License-Identifier: GPL-2.0 + +pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS2: u32 = 0; +pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL: u32 = 23068672; +pub const GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X: u32 = 8388608; +pub const GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB: u32 = 98304; +pub const GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE: u32 = 100663296; +pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MIN_MB: u32 = 64; +pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MAX_MB: u32 = 256; +pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB: u32 = 88; +pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB: u32 = 280; +pub type __u8 = ffi::c_uchar; +pub type __u16 = ffi::c_ushort; +pub type __u32 = ffi::c_uint; +pub type __u64 = ffi::c_ulonglong; +pub type u8_ = __u8; +pub type u16_ = __u16; +pub type u32_ = __u32; +pub type u64_ = __u64; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct GspFwWprMeta { + pub magic: u64_, + pub revision: u64_, + pub sysmemAddrOfRadix3Elf: u64_, + pub sizeOfRadix3Elf: u64_, + pub sysmemAddrOfBootloader: u64_, + pub sizeOfBootloader: u64_, + pub bootloaderCodeOffset: u64_, + pub bootloaderDataOffset: u64_, + pub bootloaderManifestOffset: u64_, + pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_1, + pub gspFwRsvdStart: u64_, + pub nonWprHeapOffset: u64_, + pub nonWprHeapSize: u64_, + pub gspFwWprStart: u64_, + pub gspFwHeapOffset: u64_, + pub gspFwHeapSize: u64_, + pub gspFwOffset: u64_, + pub bootBinOffset: u64_, + pub frtsOffset: u64_, + pub frtsSize: u64_, + pub gspFwWprEnd: u64_, + pub fbSize: u64_, + pub vgaWorkspaceOffset: u64_, + pub vgaWorkspaceSize: u64_, + pub bootCount: u64_, + pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_2, + pub gspFwHeapVfPartitionCount: u8_, + pub flags: u8_, + pub padding: [u8_; 2usize], + pub pmuReservedSize: u32_, + pub verified: u64_, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union GspFwWprMeta__bindgen_ty_1 { + pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_1__bindgen_ty_1, + pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_1__bindgen_ty_2, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GspFwWprMeta__bindgen_ty_1__bindgen_ty_1 { + pub sysmemAddrOfSignature: u64_, + pub sizeOfSignature: u64_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GspFwWprMeta__bindgen_ty_1__bindgen_ty_2 { + pub gspFwHeapFreeListWprOffset: u32_, + pub unused0: u32_, + pub unused1: u64_, +} +impl Default for GspFwWprMeta__bindgen_ty_1 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union GspFwWprMeta__bindgen_ty_2 { + pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_2__bindgen_ty_1, + pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_2__bindgen_ty_2, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_1 { + pub partitionRpcAddr: u64_, + pub partitionRpcRequestOffset: u16_, + pub partitionRpcReplyOffset: u16_, + pub elfCodeOffset: u32_, + pub elfDataOffset: u32_, + pub elfCodeSize: u32_, + pub elfDataSize: u32_, + pub lsUcodeVersion: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_2 { + pub partitionRpcPadding: [u32_; 4usize], + pub sysmemAddrOfCrashReportQueue: u64_, + pub sizeOfCrashReportQueue: u32_, + pub lsUcodeVersionPadding: [u32_; 1usize], +} +impl Default for GspFwWprMeta__bindgen_ty_2 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +impl Default for GspFwWprMeta { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} -- cgit From 1101c442410cd57af848c30804e985aab9e0e569 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 10 Nov 2025 22:34:10 +0900 Subject: gpu: nova-core: Set correct DMA mask Set the correct DMA mask. Without this DMA will fail on some setups. Signed-off-by: Alistair Popple Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-2-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/driver.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 2509f75eccb9..d91bbc50cde7 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -4,6 +4,8 @@ use kernel::{ auxiliary, c_str, device::Core, + dma::Device, + dma::DmaMask, pci, pci::{ Class, @@ -25,6 +27,15 @@ pub(crate) struct NovaCore { } const BAR0_SIZE: usize = SZ_16M; + +// For now we only support Ampere which can use up to 47-bit DMA addresses. +// +// TODO: Add an abstraction for this to support newer GPUs which may support +// larger DMA addresses. Limiting these GPUs to smaller address widths won't +// have any adverse affects, unless installed on systems which require larger +// DMA addresses. These systems should be quite rare. +const GPU_DMA_BITS: u32 = 47; + pub(crate) type Bar0 = pci::Bar; kernel::pci_device_table!( @@ -62,6 +73,11 @@ impl pci::Driver for NovaCore { pdev.enable_device_mem()?; pdev.set_master(); + // SAFETY: No concurrent DMA allocations or mappings can be made because + // the device is still being probed and therefore isn't being used by + // other threads of execution. + unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::())? }; + let devres_bar = Arc::pin_init( pdev.iomap_region_sized::(0, c_str!("nova-core/bar0")), GFP_KERNEL, -- cgit From 89605daa1ee0de634d7f2ee6370363cfaa8c9499 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 10 Nov 2025 22:34:11 +0900 Subject: gpu: nova-core: num: add functions to safely convert a const value to a smaller type There are times where we need to store a constant value defined as a larger type (e.g. through a binding) into a smaller type, knowing that the value will fit. Rust, unfortunately, only provides us with the `as` operator for that purpose, the use of which is discouraged as it silently strips data. Extend the `num` module with functions allowing to perform the conversion infallibly, at compile time. Example: const FOO_VALUE: u32 = 1; // `FOO_VALUE` fits into a `u8`, so the conversion is valid. let foo = num::u32_to_u8::<{ FOO_VALUE }>(); We are going to use this feature extensively in Nova. Reviewed-by: Mikko Perttunen [acourbot@nvidia.com: fix mistake in example pointed out by Mikko.] Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-3-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/num.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/gpu/nova-core/num.rs b/drivers/gpu/nova-core/num.rs index 92a91b9e30de..c952a834e662 100644 --- a/drivers/gpu/nova-core/num.rs +++ b/drivers/gpu/nova-core/num.rs @@ -163,3 +163,55 @@ where T::from_safe_cast(self) } } + +/// Implements lossless conversion of a constant from a larger type into a smaller one. +macro_rules! impl_const_into { + ($from:ty => { $($into:ty),* }) => { + $( + paste! { + #[doc = ::core::concat!( + "Performs a build-time safe conversion of a [`", + ::core::stringify!($from), + "`] constant value into a [`", + ::core::stringify!($into), + "`].")] + /// + /// This checks at compile-time that the conversion is lossless, and triggers a build + /// error if it isn't. + /// + /// # Examples + /// + /// ``` + /// use crate::num; + /// + /// // Succeeds because the value of the source fits into the destination's type. + #[doc = ::core::concat!( + "assert_eq!(num::", + ::core::stringify!($from), + "_into_", + ::core::stringify!($into), + "::<1", + ::core::stringify!($from), + ">(), 1", + ::core::stringify!($into), + ");")] + /// ``` + #[allow(unused)] + pub(crate) const fn [<$from _into_ $into>]() -> $into { + // Make sure that the target type is smaller than the source one. + static_assert!($from::BITS >= $into::BITS); + // CAST: we statically enforced above that `$from` is larger than `$into`, so the + // `as` conversion will be lossless. + build_assert!(N >= $into::MIN as $from && N <= $into::MAX as $from); + + N as $into + } + } + )* + }; +} + +impl_const_into!(usize => { u8, u16, u32 }); +impl_const_into!(u64 => { u8, u16, u32 }); +impl_const_into!(u32 => { u8, u16 }); +impl_const_into!(u16 => { u8 }); -- cgit From f38b4f105cfc19598bdb512c08f5d27be774f0de Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 10 Nov 2025 22:34:12 +0900 Subject: gpu: nova-core: Create initial Gsp The GSP requires several areas of memory to operate. Each of these have their own simple embedded page tables. Set these up and map them for DMA to/from GSP using CoherentAllocation's. Return the DMA handle describing where each of these regions are for future use when booting GSP. Signed-off-by: Alistair Popple Co-developed-by: Alexandre Courbot Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-4-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/gpu.rs | 2 +- drivers/gpu/nova-core/gsp.rs | 123 ++++++++++++++++++++-- drivers/gpu/nova-core/gsp/fw.rs | 60 +++++++++++ drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs | 19 ++++ 4 files changed, 197 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 802e71e4f97d..03dae437bc37 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -231,7 +231,7 @@ impl Gpu { sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?, - gsp <- Gsp::new(), + gsp <- Gsp::new(pdev)?, _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? }, diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 55a1ad90a373..ec053395694b 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -2,7 +2,17 @@ mod boot; -use kernel::prelude::*; +use kernel::{ + device, + dma::{ + CoherentAllocation, + DmaAddress, // + }, + dma_write, + pci, + prelude::*, + transmute::AsBytes, // +}; mod fw; @@ -11,17 +21,118 @@ pub(crate) use fw::{ LibosParams, // }; +use crate::{ + gsp::fw::LibosMemoryRegionInitArgument, + num, // +}; + pub(crate) const GSP_PAGE_SHIFT: usize = 12; pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT; -/// GSP runtime data. +/// Number of GSP pages to use in a RM log buffer. +const RM_LOG_BUFFER_NUM_PAGES: usize = 0x10; + +/// Array of page table entries, as understood by the GSP bootloader. +#[repr(C)] +struct PteArray([u64; NUM_ENTRIES]); + +/// SAFETY: arrays of `u64` implement `AsBytes` and we are but a wrapper around one. +unsafe impl AsBytes for PteArray {} + +impl PteArray { + /// Creates a new page table array mapping `NUM_PAGES` GSP pages starting at address `start`. + fn new(start: DmaAddress) -> Result { + let mut ptes = [0u64; NUM_PAGES]; + for (i, pte) in ptes.iter_mut().enumerate() { + *pte = start + .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) + .ok_or(EOVERFLOW)?; + } + + Ok(Self(ptes)) + } +} + +/// The logging buffers are byte queues that contain encoded printf-like +/// messages from GSP-RM. They need to be decoded by a special application +/// that can parse the buffers. +/// +/// The 'loginit' buffer contains logs from early GSP-RM init and +/// exception dumps. The 'logrm' buffer contains the subsequent logs. Both are +/// written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE. /// -/// This is an empty pinned placeholder for now. +/// The physical address map for the log buffer is stored in the buffer +/// itself, starting with offset 1. Offset 0 contains the "put" pointer (pp). +/// Initially, pp is equal to 0. If the buffer has valid logging data in it, +/// then pp points to index into the buffer where the next logging entry will +/// be written. Therefore, the logging data is valid if: +/// 1 <= pp < sizeof(buffer)/sizeof(u64) +struct LogBuffer(CoherentAllocation); + +impl LogBuffer { + /// Creates a new `LogBuffer` mapped on `dev`. + fn new(dev: &device::Device) -> Result { + const NUM_PAGES: usize = RM_LOG_BUFFER_NUM_PAGES; + + let mut obj = Self(CoherentAllocation::::alloc_coherent( + dev, + NUM_PAGES * GSP_PAGE_SIZE, + GFP_KERNEL | __GFP_ZERO, + )?); + let ptes = PteArray::::new(obj.0.dma_handle())?; + + // SAFETY: `obj` has just been created and we are its sole user. + unsafe { + // Copy the self-mapping PTE at the expected location. + obj.0 + .as_slice_mut(size_of::(), size_of_val(&ptes))? + .copy_from_slice(ptes.as_bytes()) + }; + + Ok(obj) + } +} + +/// GSP runtime data. #[pin_data] -pub(crate) struct Gsp {} +pub(crate) struct Gsp { + /// Libos arguments. + pub(crate) libos: CoherentAllocation, + /// Init log buffer. + loginit: LogBuffer, + /// Interrupts log buffer. + logintr: LogBuffer, + /// RM log buffer. + logrm: LogBuffer, +} impl Gsp { - pub(crate) fn new() -> impl PinInit { - pin_init!(Self {}) + // Creates an in-place initializer for a `Gsp` manager for `pdev`. + pub(crate) fn new(pdev: &pci::Device) -> Result> { + let dev = pdev.as_ref(); + let libos = CoherentAllocation::::alloc_coherent( + dev, + GSP_PAGE_SIZE / size_of::(), + GFP_KERNEL | __GFP_ZERO, + )?; + + // Initialise the logging structures. The OpenRM equivalents are in: + // _kgspInitLibosLoggingStructures (allocates memory for buffers) + // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) + let loginit = LogBuffer::new(dev)?; + dma_write!(libos[0] = LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0))?; + + let logintr = LogBuffer::new(dev)?; + dma_write!(libos[1] = LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0))?; + + let logrm = LogBuffer::new(dev)?; + dma_write!(libos[2] = LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0))?; + + Ok(try_pin_init!(Self { + libos, + loginit, + logintr, + logrm, + })) } } diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index 436c00d07b16..458b5610061f 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -8,11 +8,16 @@ use r570_144 as bindings; use core::ops::Range; use kernel::{ + dma::CoherentAllocation, ptr::{ Alignable, Alignment, // }, sizes::SZ_1M, + transmute::{ + AsBytes, + FromBytes, // + }, }; use crate::{ @@ -116,3 +121,58 @@ impl LibosParams { /// addresses of the GSP bootloader and firmware. #[repr(transparent)] pub(crate) struct GspFwWprMeta(bindings::GspFwWprMeta); + +/// Struct containing the arguments required to pass a memory buffer to the GSP +/// for use during initialisation. +/// +/// The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is +/// configured for a larger page size (e.g. 64K pages), we need to give +/// the GSP an array of 4K pages. Since we only create physically contiguous +/// buffers the math to calculate the addresses is simple. +/// +/// The buffers must be a multiple of GSP_PAGE_SIZE. GSP-RM also currently +/// ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the +/// buffers to be physically contiguous anyway. +/// +/// The memory allocated for the arguments must remain until the GSP sends the +/// init_done RPC. +#[repr(transparent)] +pub(crate) struct LibosMemoryRegionInitArgument(bindings::LibosMemoryRegionInitArgument); + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for LibosMemoryRegionInitArgument {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for LibosMemoryRegionInitArgument {} + +impl LibosMemoryRegionInitArgument { + pub(crate) fn new( + name: &'static str, + obj: &CoherentAllocation, + ) -> Self { + /// Generates the `ID8` identifier required for some GSP objects. + fn id8(name: &str) -> u64 { + let mut bytes = [0u8; core::mem::size_of::()]; + + for (c, b) in name.bytes().rev().zip(&mut bytes) { + *b = c; + } + + u64::from_ne_bytes(bytes) + } + + Self(bindings::LibosMemoryRegionInitArgument { + id8: id8(name), + pa: obj.dma_handle(), + size: num::usize_as_u64(obj.size()), + kind: num::u32_into_u8::< + { bindings::LibosMemoryRegionKind_LIBOS_MEMORY_REGION_CONTIGUOUS }, + >(), + loc: num::u32_into_u8::< + { bindings::LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM }, + >(), + ..Default::default() + }) + } +} diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index 0407000cca22..6a14cc324391 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -124,3 +124,22 @@ impl Default for GspFwWprMeta { } } } +pub type LibosAddress = u64_; +pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_NONE: LibosMemoryRegionKind = 0; +pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_CONTIGUOUS: LibosMemoryRegionKind = 1; +pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_RADIX3: LibosMemoryRegionKind = 2; +pub type LibosMemoryRegionKind = ffi::c_uint; +pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_NONE: LibosMemoryRegionLoc = 0; +pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM: LibosMemoryRegionLoc = 1; +pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_FB: LibosMemoryRegionLoc = 2; +pub type LibosMemoryRegionLoc = ffi::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct LibosMemoryRegionInitArgument { + pub id8: LibosAddress, + pub pa: LibosAddress, + pub size: LibosAddress, + pub kind: u8_, + pub loc: u8_, + pub __bindgen_padding_0: [u8; 6usize], +} -- cgit From 41235c40eda024f8d2a1e2456ab7a82c9db05e78 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 10 Nov 2025 22:34:13 +0900 Subject: gpu: nova-core: gsp: Create wpr metadata The GSP requires some pieces of metadata to boot. These are passed in a struct which the GSP transfers via DMA. Create this struct and get a handle to it for future use when booting the GSP. Signed-off-by: Alistair Popple Co-developed-by: Alexandre Courbot Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-5-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/fb.rs | 1 - drivers/gpu/nova-core/firmware/gsp.rs | 3 +- drivers/gpu/nova-core/firmware/riscv.rs | 6 +-- drivers/gpu/nova-core/gsp/boot.rs | 7 +++ drivers/gpu/nova-core/gsp/fw.rs | 61 ++++++++++++++++++++++- drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs | 2 + 6 files changed, 73 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs index 156d9bf1f191..3c9cf151786c 100644 --- a/drivers/gpu/nova-core/fb.rs +++ b/drivers/gpu/nova-core/fb.rs @@ -98,7 +98,6 @@ impl SysmemFlush { /// /// Contains ranges of GPU memory reserved for a given purpose during the GSP boot process. #[derive(Debug)] -#[expect(dead_code)] pub(crate) struct FbLayout { /// Range of the framebuffer. Starts at `0`. pub(crate) fb: Range, diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs index 471ace238f62..0549805282ab 100644 --- a/drivers/gpu/nova-core/firmware/gsp.rs +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -145,7 +145,7 @@ pub(crate) struct GspFirmware { /// Size in bytes of the firmware contained in [`Self::fw`]. pub(crate) size: usize, /// Device-mapped GSP signatures matching the GPU's [`Chipset`]. - signatures: DmaObject, + pub(crate) signatures: DmaObject, /// GSP bootloader, verifies the GSP firmware before loading and running it. pub(crate) bootloader: RiscvFirmware, } @@ -231,7 +231,6 @@ impl GspFirmware { })) } - #[expect(unused)] /// Returns the DMA handle of the radix3 level 0 page table. pub(crate) fn radix3_dma_handle(&self) -> DmaAddress { self.level0.dma_handle() diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs index 3838fab8f1c0..7d82fb9876e8 100644 --- a/drivers/gpu/nova-core/firmware/riscv.rs +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -60,11 +60,11 @@ impl RmRiscvUCodeDesc { #[expect(unused)] pub(crate) struct RiscvFirmware { /// Offset at which the code starts in the firmware image. - code_offset: u32, + pub(crate) code_offset: u32, /// Offset at which the data starts in the firmware image. - data_offset: u32, + pub(crate) data_offset: u32, /// Offset at which the manifest starts in the firmware image. - manifest_offset: u32, + pub(crate) manifest_offset: u32, /// Application version. app_version: u32, /// Device-mapped firmware image. diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 979d3391e58c..5ea53250bf37 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -2,6 +2,8 @@ use kernel::{ device, + dma::CoherentAllocation, + dma_write, pci, prelude::*, // }; @@ -27,6 +29,7 @@ use crate::{ FIRMWARE_VERSION, // }, gpu::Chipset, + gsp::GspFwWprMeta, regs, vbios::Vbios, }; @@ -146,6 +149,10 @@ impl super::Gsp { bar, )?; + let wpr_meta = + CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; + dma_write!(wpr_meta[0] = GspFwWprMeta::new(&gsp_fw, &fb_layout))?; + Ok(()) } } diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index 458b5610061f..a6ee52475bdb 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -13,7 +13,10 @@ use kernel::{ Alignable, Alignment, // }, - sizes::SZ_1M, + sizes::{ + SZ_128K, + SZ_1M, // + }, transmute::{ AsBytes, FromBytes, // @@ -21,6 +24,8 @@ use kernel::{ }; use crate::{ + fb::FbLayout, + firmware::gsp::GspFirmware, gpu::Chipset, num::{ self, @@ -122,6 +127,60 @@ impl LibosParams { #[repr(transparent)] pub(crate) struct GspFwWprMeta(bindings::GspFwWprMeta); +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for GspFwWprMeta {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for GspFwWprMeta {} + +type GspFwWprMetaBootResumeInfo = r570_144::GspFwWprMeta__bindgen_ty_1; +type GspFwWprMetaBootInfo = r570_144::GspFwWprMeta__bindgen_ty_1__bindgen_ty_1; + +impl GspFwWprMeta { + /// Fill in and return a `GspFwWprMeta` suitable for booting `gsp_firmware` using the + /// `fb_layout` layout. + pub(crate) fn new(gsp_firmware: &GspFirmware, fb_layout: &FbLayout) -> Self { + Self(bindings::GspFwWprMeta { + // CAST: we want to store the bits of `GSP_FW_WPR_META_MAGIC` unmodified. + magic: r570_144::GSP_FW_WPR_META_MAGIC as u64, + revision: u64::from(r570_144::GSP_FW_WPR_META_REVISION), + sysmemAddrOfRadix3Elf: gsp_firmware.radix3_dma_handle(), + sizeOfRadix3Elf: u64::from_safe_cast(gsp_firmware.size), + sysmemAddrOfBootloader: gsp_firmware.bootloader.ucode.dma_handle(), + sizeOfBootloader: u64::from_safe_cast(gsp_firmware.bootloader.ucode.size()), + bootloaderCodeOffset: u64::from(gsp_firmware.bootloader.code_offset), + bootloaderDataOffset: u64::from(gsp_firmware.bootloader.data_offset), + bootloaderManifestOffset: u64::from(gsp_firmware.bootloader.manifest_offset), + __bindgen_anon_1: GspFwWprMetaBootResumeInfo { + __bindgen_anon_1: GspFwWprMetaBootInfo { + sysmemAddrOfSignature: gsp_firmware.signatures.dma_handle(), + sizeOfSignature: u64::from_safe_cast(gsp_firmware.signatures.size()), + }, + }, + gspFwRsvdStart: fb_layout.heap.start, + nonWprHeapOffset: fb_layout.heap.start, + nonWprHeapSize: fb_layout.heap.end - fb_layout.heap.start, + gspFwWprStart: fb_layout.wpr2.start, + gspFwHeapOffset: fb_layout.wpr2_heap.start, + gspFwHeapSize: fb_layout.wpr2_heap.end - fb_layout.wpr2_heap.start, + gspFwOffset: fb_layout.elf.start, + bootBinOffset: fb_layout.boot.start, + frtsOffset: fb_layout.frts.start, + frtsSize: fb_layout.frts.end - fb_layout.frts.start, + gspFwWprEnd: fb_layout + .vga_workspace + .start + .align_down(Alignment::new::()), + gspFwHeapVfPartitionCount: fb_layout.vf_partition_count, + fbSize: fb_layout.fb.end - fb_layout.fb.start, + vgaWorkspaceOffset: fb_layout.vga_workspace.start, + vgaWorkspaceSize: fb_layout.vga_workspace.end - fb_layout.vga_workspace.start, + ..Default::default() + }) + } +} + /// Struct containing the arguments required to pass a memory buffer to the GSP /// for use during initialisation. /// diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index 6a14cc324391..392b25dc6991 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -9,6 +9,8 @@ pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MIN_MB: u32 = 64; pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MAX_MB: u32 = 256; pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB: u32 = 88; pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB: u32 = 280; +pub const GSP_FW_WPR_META_REVISION: u32 = 1; +pub const GSP_FW_WPR_META_MAGIC: i64 = -2577556379034558285; pub type __u8 = ffi::c_uchar; pub type __u16 = ffi::c_ushort; pub type __u32 = ffi::c_uint; -- cgit From d416035fb6fb4367e40388552ff8079a97c7155f Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 10 Nov 2025 22:34:14 +0900 Subject: gpu: nova-core: Add a slice-buffer (sbuffer) datastructure A data structure that can be used to write across multiple slices which may be out of order in memory. This lets SBuffer user correctly and safely write out of memory order, without error-prone tracking of pointers/offsets. let mut buf1 = [0u8; 3]; let mut buf2 = [0u8; 5]; let mut sbuffer = SBuffer::new([&mut buf1[..], &mut buf2[..]]); let data = b"hello"; let result = sbuffer.write(data); Reviewed-by: Lyude Paul Co-developed-by: Alistair Popple Signed-off-by: Alistair Popple Signed-off-by: Joel Fernandes Co-developed-by: Alexandre Courbot Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-6-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/nova_core.rs | 1 + drivers/gpu/nova-core/sbuffer.rs | 232 +++++++++++++++++++++++++++++++++++++ 2 files changed, 233 insertions(+) create mode 100644 drivers/gpu/nova-core/sbuffer.rs diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index 9180ec9c27ef..c1121e7c64c5 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -15,6 +15,7 @@ mod gpu; mod gsp; mod num; mod regs; +mod sbuffer; mod vbios; pub(crate) const MODULE_NAME: &kernel::str::CStr = ::NAME; diff --git a/drivers/gpu/nova-core/sbuffer.rs b/drivers/gpu/nova-core/sbuffer.rs new file mode 100644 index 000000000000..f0cecbcb81be --- /dev/null +++ b/drivers/gpu/nova-core/sbuffer.rs @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::ops::Deref; + +use kernel::{ + alloc::KVec, + prelude::*, // +}; + +/// A buffer abstraction for discontiguous byte slices. +/// +/// This allows you to treat multiple non-contiguous `&mut [u8]` slices +/// of the same length as a single stream-like read/write buffer. +/// +/// # Examples +/// +/// ``` +// let mut buf1 = [0u8; 5]; +/// let mut buf2 = [0u8; 5]; +/// let mut sbuffer = SBufferIter::new_writer([&mut buf1[..], &mut buf2[..]]); +/// +/// let data = b"hi world!"; +/// sbuffer.write_all(data)?; +/// drop(sbuffer); +/// +/// assert_eq!(buf1, *b"hi wo"); +/// assert_eq!(buf2, *b"rld!\0"); +/// +/// # Ok::<(), Error>(()) +/// ``` +pub(crate) struct SBufferIter { + // [`Some`] if we are not at the end of the data yet. + cur_slice: Option, + // All the slices remaining after `cur_slice`. + slices: I, +} + +impl<'a, I> SBufferIter +where + I: Iterator, +{ + /// Creates a reader buffer for a discontiguous set of byte slices. + /// + /// # Examples + /// + /// ``` + /// let buf1: [u8; 5] = [0, 1, 2, 3, 4]; + /// let buf2: [u8; 5] = [5, 6, 7, 8, 9]; + /// let sbuffer = SBufferIter::new_reader([&buf1[..], &buf2[..]]); + /// let sum: u8 = sbuffer.sum(); + /// assert_eq!(sum, 45); + /// ``` + #[expect(unused)] + pub(crate) fn new_reader(slices: impl IntoIterator) -> Self + where + I: Iterator, + { + Self::new(slices) + } + + /// Creates a writeable buffer for a discontiguous set of byte slices. + /// + /// # Examples + /// + /// ``` + /// let mut buf1 = [0u8; 5]; + /// let mut buf2 = [0u8; 5]; + /// let mut sbuffer = SBufferIter::new_writer([&mut buf1[..], &mut buf2[..]]); + /// sbuffer.write_all(&[0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9][..])?; + /// drop(sbuffer); + /// assert_eq!(buf1, [0, 1, 2, 3, 4]); + /// assert_eq!(buf2, [5, 6, 7, 8, 9]); + /// + /// ``` + #[expect(unused)] + pub(crate) fn new_writer(slices: impl IntoIterator) -> Self + where + I: Iterator, + { + Self::new(slices) + } + + fn new(slices: impl IntoIterator) -> Self + where + I::Item: Deref, + { + let mut slices = slices.into_iter(); + + Self { + // Skip empty slices. + cur_slice: slices.find(|s| !s.deref().is_empty()), + slices, + } + } + + /// Returns a slice of at most `len` bytes, or [`None`] if we are at the end of the data. + /// + /// If a slice shorter than `len` bytes has been returned, the caller can call this method + /// again until it returns [`None`] to try and obtain the remainder of the data. + /// + /// The closure `f` should split the slice received in it's first parameter + /// at the position given in the second parameter. + fn get_slice_internal( + &mut self, + len: usize, + mut f: impl FnMut(I::Item, usize) -> (I::Item, I::Item), + ) -> Option + where + I::Item: Deref, + { + match self.cur_slice.take() { + None => None, + Some(cur_slice) => { + if len >= cur_slice.len() { + // Caller requested more data than is in the current slice, return it entirely + // and prepare the following slice for being used. Skip empty slices to avoid + // trouble. + self.cur_slice = self.slices.find(|s| !s.is_empty()); + + Some(cur_slice) + } else { + // The current slice can satisfy the request, split it and return a slice of + // the requested size. + let (ret, next) = f(cur_slice, len); + self.cur_slice = Some(next); + + Some(ret) + } + } + } + } + + /// Returns whether this buffer still has data available. + #[expect(unused)] + pub(crate) fn is_empty(&self) -> bool { + self.cur_slice.is_none() + } +} + +/// Provides a way to get non-mutable slices of data to read from. +impl<'a, I> SBufferIter +where + I: Iterator, +{ + /// Returns a slice of at most `len` bytes, or [`None`] if we are at the end of the data. + /// + /// If a slice shorter than `len` bytes has been returned, the caller can call this method + /// again until it returns [`None`] to try and obtain the remainder of the data. + fn get_slice(&mut self, len: usize) -> Option<&'a [u8]> { + self.get_slice_internal(len, |s, pos| s.split_at(pos)) + } + + /// Ideally we would implement `Read`, but it is not available in `core`. + /// So mimic `std::io::Read::read_exact`. + #[expect(unused)] + pub(crate) fn read_exact(&mut self, mut dst: &mut [u8]) -> Result { + while !dst.is_empty() { + match self.get_slice(dst.len()) { + None => return Err(EINVAL), + Some(src) => { + let dst_slice; + (dst_slice, dst) = dst.split_at_mut(src.len()); + dst_slice.copy_from_slice(src); + } + } + } + + Ok(()) + } + + /// Read all the remaining data into a [`KVec`]. + /// + /// `self` will be empty after this operation. + #[expect(unused)] + pub(crate) fn flush_into_kvec(&mut self, flags: kernel::alloc::Flags) -> Result> { + let mut buf = KVec::::new(); + + if let Some(slice) = core::mem::take(&mut self.cur_slice) { + buf.extend_from_slice(slice, flags)?; + } + for slice in &mut self.slices { + buf.extend_from_slice(slice, flags)?; + } + + Ok(buf) + } +} + +/// Provides a way to get mutable slices of data to write into. +impl<'a, I> SBufferIter +where + I: Iterator, +{ + /// Returns a mutable slice of at most `len` bytes, or [`None`] if we are at the end of the + /// data. + /// + /// If a slice shorter than `len` bytes has been returned, the caller can call this method + /// again until it returns `None` to try and obtain the remainder of the data. + fn get_slice_mut(&mut self, len: usize) -> Option<&'a mut [u8]> { + self.get_slice_internal(len, |s, pos| s.split_at_mut(pos)) + } + + /// Ideally we would implement [`Write`], but it is not available in `core`. + /// So mimic `std::io::Write::write_all`. + #[expect(unused)] + pub(crate) fn write_all(&mut self, mut src: &[u8]) -> Result { + while !src.is_empty() { + match self.get_slice_mut(src.len()) { + None => return Err(ETOOSMALL), + Some(dst) => { + let src_slice; + (src_slice, src) = src.split_at(dst.len()); + dst.copy_from_slice(src_slice); + } + } + } + + Ok(()) + } +} + +impl<'a, I> Iterator for SBufferIter +where + I: Iterator, +{ + type Item = u8; + + fn next(&mut self) -> Option { + // Returned slices are guaranteed to not be empty so we can safely index the first entry. + self.get_slice(1).map(|s| s[0]) + } +} -- cgit From 6b5a10dad555310e44ff5f53f97e18be76b395ea Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 10 Nov 2025 22:34:15 +0900 Subject: gpu: nova-core: Add zeroable trait to bindings Derive the Zeroable trait for existing bindgen generated bindings. This is safe because all bindgen generated types are simple integer types for which any bit pattern, including all zeros, is valid. Signed-off-by: Alistair Popple Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-7-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/gsp/fw/r570_144.rs | 5 ++++- drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs | 16 ++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144.rs b/drivers/gpu/nova-core/gsp/fw/r570_144.rs index 82a973cd99c3..048234d1a9d1 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144.rs @@ -24,5 +24,8 @@ unreachable_pub, unsafe_op_in_unsafe_fn )] -use kernel::ffi; +use kernel::{ + ffi, + prelude::Zeroable, // +}; include!("r570_144/bindings.rs"); diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index 392b25dc6991..f7b38978c5f8 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -20,7 +20,7 @@ pub type u16_ = __u16; pub type u32_ = __u32; pub type u64_ = __u64; #[repr(C)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Zeroable)] pub struct GspFwWprMeta { pub magic: u64_, pub revision: u64_, @@ -55,19 +55,19 @@ pub struct GspFwWprMeta { pub verified: u64_, } #[repr(C)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Zeroable)] pub union GspFwWprMeta__bindgen_ty_1 { pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_1__bindgen_ty_1, pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_1__bindgen_ty_2, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] pub struct GspFwWprMeta__bindgen_ty_1__bindgen_ty_1 { pub sysmemAddrOfSignature: u64_, pub sizeOfSignature: u64_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] pub struct GspFwWprMeta__bindgen_ty_1__bindgen_ty_2 { pub gspFwHeapFreeListWprOffset: u32_, pub unused0: u32_, @@ -83,13 +83,13 @@ impl Default for GspFwWprMeta__bindgen_ty_1 { } } #[repr(C)] -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Zeroable)] pub union GspFwWprMeta__bindgen_ty_2 { pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_2__bindgen_ty_1, pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_2__bindgen_ty_2, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_1 { pub partitionRpcAddr: u64_, pub partitionRpcRequestOffset: u16_, @@ -101,7 +101,7 @@ pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_1 { pub lsUcodeVersion: u32_, } #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_2 { pub partitionRpcPadding: [u32_; 4usize], pub sysmemAddrOfCrashReportQueue: u64_, @@ -136,7 +136,7 @@ pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM: LibosMemoryRegion pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_FB: LibosMemoryRegionLoc = 2; pub type LibosMemoryRegionLoc = ffi::c_uint; #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] pub struct LibosMemoryRegionInitArgument { pub id8: LibosAddress, pub pa: LibosAddress, -- cgit From 88622323dde3d9d6efd6f2efcfaa0bced5af94c3 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 10 Nov 2025 22:34:16 +0900 Subject: rust: enable slice_flatten feature and provide it through an extension trait In Rust 1.80, the previously unstable `slice::flatten` family of methods have been stabilized and renamed to `slice::as_flattened`. This creates an issue as we want to use `as_flattened`, but need to support the MSRV (which at the moment is Rust 1.78) where it is named `flatten`. Solve this by enabling the `slice_flatten` feature, and providing an `as_flattened` implementation through an extension trait for compiler versions where it is not available. The trait is then exported from the prelude, making the `as_flattened` family of methods transparently available for all supported compiler versions. This extension trait can be removed once the MSRV passes 1.80. Suggested-by: Miguel Ojeda Link: https://lore.kernel.org/all/CANiq72kK4pG=O35NwxPNoTO17oRcg1yfGcvr3==Fi4edr+sfmw@mail.gmail.com/ Acked-by: Danilo Krummrich Acked-by: Miguel Ojeda Reviewed-by: Alice Ryhl Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-8-8ae4058e3c0e@nvidia.com> Message-ID: <20251104-b4-as-flattened-v3-1-6cb9c26b45cd@nvidia.com> --- init/Kconfig | 3 +++ rust/kernel/lib.rs | 4 ++++ rust/kernel/prelude.rs | 3 +++ rust/kernel/slice.rs | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+) create mode 100644 rust/kernel/slice.rs diff --git a/init/Kconfig b/init/Kconfig index cab3ad28ca49..7da93c9cccc3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -147,6 +147,9 @@ config LD_CAN_USE_KEEP_IN_OVERLAY # https://github.com/llvm/llvm-project/pull/130661 def_bool LD_IS_BFD || LLD_VERSION >= 210000 +config RUSTC_HAS_SLICE_AS_FLATTENED + def_bool RUSTC_VERSION >= 108000 + config RUSTC_HAS_COERCE_POINTEE def_bool RUSTC_VERSION >= 108400 diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 3dd7bebe7888..2581a356d114 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -21,6 +21,9 @@ #![feature(inline_const)] #![feature(pointer_is_aligned)] // +// Stable since Rust 1.80.0. +#![feature(slice_flatten)] +// // Stable since Rust 1.81.0. #![feature(lint_reasons)] // @@ -128,6 +131,7 @@ pub mod scatterlist; pub mod security; pub mod seq_file; pub mod sizes; +pub mod slice; mod static_assert; #[doc(hidden)] pub mod std_vendor; diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs index 198d09a31449..9ee8acc563de 100644 --- a/rust/kernel/prelude.rs +++ b/rust/kernel/prelude.rs @@ -51,3 +51,6 @@ pub use super::init::InPlaceInit; pub use super::current; pub use super::uaccess::UserPtr; + +#[cfg(not(CONFIG_RUSTC_HAS_SLICE_AS_FLATTENED))] +pub use super::slice::AsFlattened; diff --git a/rust/kernel/slice.rs b/rust/kernel/slice.rs new file mode 100644 index 000000000000..6ca91a4fd1f2 --- /dev/null +++ b/rust/kernel/slice.rs @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Additional (and temporary) slice helpers. + +/// Extension trait providing a portable version of [`as_flattened`] and +/// [`as_flattened_mut`]. +/// +/// In Rust 1.80, the previously unstable `slice::flatten` family of methods +/// have been stabilized and renamed from `flatten` to `as_flattened`. +/// +/// This creates an issue for as long as the MSRV is < 1.80, as the same functionality is provided +/// by different methods depending on the compiler version. +/// +/// This extension trait solves this by abstracting `as_flatten` and calling the correct method +/// depending on the Rust version. +/// +/// This trait can be removed once the MSRV passes 1.80. +/// +/// [`as_flattened`]: slice::as_flattened +/// [`as_flattened_mut`]: slice::as_flattened_mut +#[cfg(not(CONFIG_RUSTC_HAS_SLICE_AS_FLATTENED))] +pub trait AsFlattened { + /// Takes a `&[[T; N]]` and flattens it to a `&[T]`. + /// + /// This is an portable layer on top of [`as_flattened`]; see its documentation for details. + /// + /// [`as_flattened`]: slice::as_flattened + fn as_flattened(&self) -> &[T]; + + /// Takes a `&mut [[T; N]]` and flattens it to a `&mut [T]`. + /// + /// This is an portable layer on top of [`as_flattened_mut`]; see its documentation for details. + /// + /// [`as_flattened_mut`]: slice::as_flattened_mut + fn as_flattened_mut(&mut self) -> &mut [T]; +} + +#[cfg(not(CONFIG_RUSTC_HAS_SLICE_AS_FLATTENED))] +impl AsFlattened for [[T; N]] { + #[allow(clippy::incompatible_msrv)] + fn as_flattened(&self) -> &[T] { + self.flatten() + } + + #[allow(clippy::incompatible_msrv)] + fn as_flattened_mut(&mut self) -> &mut [T] { + self.flatten_mut() + } +} -- cgit From 75f6b1de8133ea337b72901464989dc811d3305d Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 10 Nov 2025 22:34:17 +0900 Subject: gpu: nova-core: gsp: Add GSP command queue bindings and handling This commit introduces core infrastructure for handling GSP command and message queues in the nova-core driver. The command queue system enables bidirectional communication between the host driver and GSP firmware through a remote message passing interface. The interface is based on passing serialised data structures over a ring buffer with separate transmit and receive queues. Commands are sent by writing to the CPU transmit queue and waiting for completion via the receive queue. To ensure safety mutable or immutable (depending on whether it is a send or receive operation) references are taken on the command queue when allocating the message to write/read to. This ensures message memory remains valid and the command queue can't be mutated whilst an operation is in progress. Currently this is only used by the probe() routine and therefore can only used by a single thread of execution. Locking to enable safe access from multiple threads will be introduced in a future series when that becomes necessary. Signed-off-by: Alistair Popple Co-developed-by: Alexandre Courbot Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-9-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/gsp.rs | 7 + drivers/gpu/nova-core/gsp/cmdq.rs | 656 ++++++++++++++++++++++ drivers/gpu/nova-core/gsp/fw.rs | 335 ++++++++++- drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs | 409 ++++++++++++++ drivers/gpu/nova-core/regs.rs | 4 + drivers/gpu/nova-core/sbuffer.rs | 3 - 6 files changed, 1410 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/nova-core/gsp/cmdq.rs diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index ec053395694b..f9819a04bb40 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -14,6 +14,7 @@ use kernel::{ transmute::AsBytes, // }; +pub(crate) mod cmdq; mod fw; pub(crate) use fw::{ @@ -22,6 +23,7 @@ pub(crate) use fw::{ }; use crate::{ + gsp::cmdq::Cmdq, gsp::fw::LibosMemoryRegionInitArgument, num, // }; @@ -104,6 +106,8 @@ pub(crate) struct Gsp { logintr: LogBuffer, /// RM log buffer. logrm: LogBuffer, + /// Command queue. + pub(crate) cmdq: Cmdq, } impl Gsp { @@ -128,11 +132,14 @@ impl Gsp { let logrm = LogBuffer::new(dev)?; dma_write!(libos[2] = LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0))?; + let cmdq = Cmdq::new(dev)?; + Ok(try_pin_init!(Self { libos, loginit, logintr, logrm, + cmdq, })) } } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs new file mode 100644 index 000000000000..c00d9fa9b79b --- /dev/null +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -0,0 +1,656 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::{ + cmp, + mem, + sync::atomic::{ + fence, + Ordering, // + }, // +}; + +use kernel::{ + device, + dma::CoherentAllocation, + dma_write, + io::poll::read_poll_timeout, + prelude::*, + sync::aref::ARef, + time::Delta, + transmute::{ + AsBytes, + FromBytes, // + }, +}; + +use crate::{ + driver::Bar0, + gsp::{ + fw::{ + GspMsgElement, + MsgFunction, + MsgqRxHeader, + MsgqTxHeader, // + }, + PteArray, + GSP_PAGE_SIZE, // + }, + num, + regs, + sbuffer::SBufferIter, // +}; + +/// Trait implemented by types representing a command to send to the GSP. +/// +/// The main purpose of this trait is to provide [`Cmdq::send_command`] with the information it +/// needs to send a given command. +/// +/// [`CommandToGsp::init`] in particular is responsible for initializing the command directly +/// into the space reserved for it in the command queue buffer. +/// +/// Some commands may be followed by a variable-length payload. For these, the +/// [`CommandToGsp::variable_payload_len`] and [`CommandToGsp::init_variable_payload`] need to be +/// defined as well. +pub(crate) trait CommandToGsp { + /// Function identifying this command to the GSP. + const FUNCTION: MsgFunction; + + /// Type generated by [`CommandToGsp::init`], to be written into the command queue buffer. + type Command: FromBytes + AsBytes; + + /// Error type returned by [`CommandToGsp::init`]. + type InitError; + + /// In-place command initializer responsible for filling the command in the command queue + /// buffer. + fn init(&self) -> impl Init; + + /// Size of the variable-length payload following the command structure generated by + /// [`CommandToGsp::init`]. + /// + /// Most commands don't have a variable-length payload, so this is zero by default. + fn variable_payload_len(&self) -> usize { + 0 + } + + /// Method initializing the variable-length payload. + /// + /// The command buffer is circular, which means that we may need to jump back to its beginning + /// while in the middle of a command. For this reason, the variable-length payload is + /// initialized using a [`SBufferIter`]. + /// + /// This method will receive a buffer of the length returned by + /// [`CommandToGsp::variable_payload_len`], and must write every single byte of it. Leaving + /// unwritten space will lead to an error. + /// + /// Most commands don't have a variable-length payload, so this does nothing by default. + fn init_variable_payload( + &self, + _dst: &mut SBufferIter>, + ) -> Result { + Ok(()) + } +} + +/// Trait representing messages received from the GSP. +/// +/// This trait tells [`Cmdq::receive_msg`] how it can receive a given type of message. +pub(crate) trait MessageFromGsp: Sized { + /// Function identifying this message from the GSP. + const FUNCTION: MsgFunction; + + /// Error type returned by [`MessageFromGsp::read`]. + type InitError; + + /// Type containing the raw message to be read from the message queue. + type Message: FromBytes; + + /// Method reading the message from the message queue and returning it. + /// + /// From a `Self::Message` and a [`SBufferIter`], constructs an instance of `Self` and returns + /// it. + fn read( + msg: &Self::Message, + sbuffer: &mut SBufferIter>, + ) -> Result; +} + +/// Number of GSP pages making the [`Msgq`]. +pub(crate) const MSGQ_NUM_PAGES: u32 = 0x3f; + +/// Circular buffer of a [`Msgq`]. +/// +/// This area of memory is to be shared between the driver and the GSP to exchange commands or +/// messages. +#[repr(C, align(0x1000))] +#[derive(Debug)] +struct MsgqData { + data: [[u8; GSP_PAGE_SIZE]; num::u32_as_usize(MSGQ_NUM_PAGES)], +} + +// Annoyingly we are forced to use a literal to specify the alignment of +// `MsgqData`, so check that it corresponds to the actual GSP page size here. +static_assert!(align_of::() == GSP_PAGE_SIZE); + +/// Unidirectional message queue. +/// +/// Contains the data for a message queue, that either the driver or GSP writes to. +/// +/// Note that while the write pointer of `tx` corresponds to the `msgq` of the same instance, the +/// read pointer of `rx` actually refers to the `Msgq` owned by the other side. +/// This design ensures that only the driver or GSP ever writes to a given instance of this struct. +#[repr(C)] +// There is no struct defined for this in the open-gpu-kernel-source headers. +// Instead it is defined by code in `GspMsgQueuesInit()`. +struct Msgq { + /// Header for sending messages, including the write pointer. + tx: MsgqTxHeader, + /// Header for receiving messages, including the read pointer. + rx: MsgqRxHeader, + /// The message queue proper. + msgq: MsgqData, +} + +/// Structure shared between the driver and the GSP and containing the command and message queues. +#[repr(C)] +struct GspMem { + /// Self-mapping page table entries. + ptes: PteArray<{ GSP_PAGE_SIZE / size_of::() }>, + /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the + /// write and read pointers that the CPU updates. + /// + /// This member is read-only for the GSP. + cpuq: Msgq, + /// GSP queue: the GSP writes messages here, and the driver reads them. It also contains the + /// write and read pointers that the GSP updates. + /// + /// This member is read-only for the driver. + gspq: Msgq, +} + +// SAFETY: These structs don't meet the no-padding requirements of AsBytes but +// that is not a problem because they are not used outside the kernel. +unsafe impl AsBytes for GspMem {} + +// SAFETY: These structs don't meet the no-padding requirements of FromBytes but +// that is not a problem because they are not used outside the kernel. +unsafe impl FromBytes for GspMem {} + +/// Wrapper around [`GspMem`] to share it with the GPU using a [`CoherentAllocation`]. +/// +/// This provides the low-level functionality to communicate with the GSP, including allocation of +/// queue space to write messages to and management of read/write pointers. +/// +/// This is shared with the GSP, with clear ownership rules regarding the command queues: +/// +/// * The driver owns (i.e. can write to) the part of the CPU message queue between the CPU write +/// pointer and the GSP read pointer. This region is returned by [`Self::driver_write_area`]. +/// * The driver owns (i.e. can read from) the part of the GSP message queue between the CPU read +/// pointer and the GSP write pointer. This region is returned by [`Self::driver_read_area`]. +struct DmaGspMem(CoherentAllocation); + +impl DmaGspMem { + /// Allocate a new instance and map it for `dev`. + fn new(dev: &device::Device) -> Result { + const MSGQ_SIZE: u32 = num::usize_into_u32::<{ size_of::() }>(); + const RX_HDR_OFF: u32 = num::usize_into_u32::<{ mem::offset_of!(Msgq, rx) }>(); + + let gsp_mem = + CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; + dma_write!(gsp_mem[0].ptes = PteArray::new(gsp_mem.dma_handle())?)?; + dma_write!(gsp_mem[0].cpuq.tx = MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES))?; + dma_write!(gsp_mem[0].cpuq.rx = MsgqRxHeader::new())?; + + Ok(Self(gsp_mem)) + } + + /// Returns the region of the CPU message queue that the driver is currently allowed to write + /// to. + /// + /// As the message queue is a circular buffer, the region may be discontiguous in memory. In + /// that case the second slice will have a non-zero length. + fn driver_write_area(&mut self) -> (&mut [[u8; GSP_PAGE_SIZE]], &mut [[u8; GSP_PAGE_SIZE]]) { + let tx = self.cpu_write_ptr() as usize; + let rx = self.gsp_read_ptr() as usize; + + // SAFETY: + // - The `CoherentAllocation` contains exactly one object. + // - We will only access the driver-owned part of the shared memory. + // - Per the safety statement of the function, no concurrent access will be performed. + let gsp_mem = &mut unsafe { self.0.as_slice_mut(0, 1) }.unwrap()[0]; + // PANIC: per the invariant of `cpu_write_ptr`, `tx` is `<= MSGQ_NUM_PAGES`. + let (before_tx, after_tx) = gsp_mem.cpuq.msgq.data.split_at_mut(tx); + + if rx <= tx { + // The area from `tx` up to the end of the ring, and from the beginning of the ring up + // to `rx`, minus one unit, belongs to the driver. + if rx == 0 { + let last = after_tx.len() - 1; + (&mut after_tx[..last], &mut before_tx[0..0]) + } else { + (after_tx, &mut before_tx[..rx]) + } + } else { + // The area from `tx` to `rx`, minus one unit, belongs to the driver. + // + // PANIC: per the invariants of `cpu_write_ptr` and `gsp_read_ptr`, `rx` and `tx` are + // `<= MSGQ_NUM_PAGES`, and the test above ensured that `rx > tx`. + (after_tx.split_at_mut(rx - tx).0, &mut before_tx[0..0]) + } + } + + /// Returns the region of the GSP message queue that the driver is currently allowed to read + /// from. + /// + /// As the message queue is a circular buffer, the region may be discontiguous in memory. In + /// that case the second slice will have a non-zero length. + fn driver_read_area(&self) -> (&[[u8; GSP_PAGE_SIZE]], &[[u8; GSP_PAGE_SIZE]]) { + let tx = self.gsp_write_ptr() as usize; + let rx = self.cpu_read_ptr() as usize; + + // SAFETY: + // - The `CoherentAllocation` contains exactly one object. + // - We will only access the driver-owned part of the shared memory. + // - Per the safety statement of the function, no concurrent access will be performed. + let gsp_mem = &unsafe { self.0.as_slice(0, 1) }.unwrap()[0]; + // PANIC: per the invariant of `cpu_read_ptr`, `xx` is `<= MSGQ_NUM_PAGES`. + let (before_rx, after_rx) = gsp_mem.gspq.msgq.data.split_at(rx); + + match tx.cmp(&rx) { + cmp::Ordering::Equal => (&after_rx[0..0], &after_rx[0..0]), + cmp::Ordering::Greater => (&after_rx[..tx], &before_rx[0..0]), + cmp::Ordering::Less => (after_rx, &before_rx[..tx]), + } + } + + /// Allocates a region on the command queue that is large enough to send a command of `size` + /// bytes. + /// + /// This returns a [`GspCommand`] ready to be written to by the caller. + /// + /// # Errors + /// + /// - `EAGAIN` if the driver area is too small to hold the requested command. + /// - `EIO` if the command header is not properly aligned. + fn allocate_command(&mut self, size: usize) -> Result> { + // Get the current writable area as an array of bytes. + let (slice_1, slice_2) = { + let (slice_1, slice_2) = self.driver_write_area(); + + #[allow(clippy::incompatible_msrv)] + (slice_1.as_flattened_mut(), slice_2.as_flattened_mut()) + }; + + // If the GSP is still processing previous messages the shared region + // may be full in which case we will have to retry once the GSP has + // processed the existing commands. + if size_of::() + size > slice_1.len() + slice_2.len() { + return Err(EAGAIN); + } + + // Extract area for the `GspMsgElement`. + let (header, slice_1) = GspMsgElement::from_bytes_mut_prefix(slice_1).ok_or(EIO)?; + + // Create the contents area. + let (slice_1, slice_2) = if slice_1.len() > size { + // Contents fits entirely in `slice_1`. + (&mut slice_1[..size], &mut slice_2[0..0]) + } else { + // Need all of `slice_1` and some of `slice_2`. + let slice_2_len = size - slice_1.len(); + (slice_1, &mut slice_2[..slice_2_len]) + }; + + Ok(GspCommand { + header, + contents: (slice_1, slice_2), + }) + } + + // Returns the index of the memory page the GSP will write the next message to. + // + // # Invariants + // + // - The returned value is between `0` and `MSGQ_NUM_PAGES`. + fn gsp_write_ptr(&self) -> u32 { + let gsp_mem = self.0.start_ptr(); + + // SAFETY: + // - The 'CoherentAllocation' contains at least one object. + // - By the invariants of `CoherentAllocation` the pointer is valid. + (unsafe { (*gsp_mem).gspq.tx.write_ptr() } % MSGQ_NUM_PAGES) + } + + // Returns the index of the memory page the GSP will read the next command from. + // + // # Invariants + // + // - The returned value is between `0` and `MSGQ_NUM_PAGES`. + fn gsp_read_ptr(&self) -> u32 { + let gsp_mem = self.0.start_ptr(); + + // SAFETY: + // - The 'CoherentAllocation' contains at least one object. + // - By the invariants of `CoherentAllocation` the pointer is valid. + (unsafe { (*gsp_mem).gspq.rx.read_ptr() } % MSGQ_NUM_PAGES) + } + + // Returns the index of the memory page the CPU can read the next message from. + // + // # Invariants + // + // - The returned value is between `0` and `MSGQ_NUM_PAGES`. + fn cpu_read_ptr(&self) -> u32 { + let gsp_mem = self.0.start_ptr(); + + // SAFETY: + // - The ['CoherentAllocation'] contains at least one object. + // - By the invariants of CoherentAllocation the pointer is valid. + (unsafe { (*gsp_mem).cpuq.rx.read_ptr() } % MSGQ_NUM_PAGES) + } + + // Informs the GSP that it can send `elem_count` new pages into the message queue. + fn advance_cpu_read_ptr(&mut self, elem_count: u32) { + let rptr = self.cpu_read_ptr().wrapping_add(elem_count) % MSGQ_NUM_PAGES; + + // Ensure read pointer is properly ordered. + fence(Ordering::SeqCst); + + let gsp_mem = self.0.start_ptr_mut(); + + // SAFETY: + // - The 'CoherentAllocation' contains at least one object. + // - By the invariants of `CoherentAllocation` the pointer is valid. + unsafe { (*gsp_mem).cpuq.rx.set_read_ptr(rptr) }; + } + + // Returns the index of the memory page the CPU can write the next command to. + // + // # Invariants + // + // - The returned value is between `0` and `MSGQ_NUM_PAGES`. + fn cpu_write_ptr(&self) -> u32 { + let gsp_mem = self.0.start_ptr(); + + // SAFETY: + // - The 'CoherentAllocation' contains at least one object. + // - By the invariants of `CoherentAllocation` the pointer is valid. + (unsafe { (*gsp_mem).cpuq.tx.write_ptr() } % MSGQ_NUM_PAGES) + } + + // Informs the GSP that it can process `elem_count` new pages from the command queue. + fn advance_cpu_write_ptr(&mut self, elem_count: u32) { + let wptr = self.cpu_write_ptr().wrapping_add(elem_count) & MSGQ_NUM_PAGES; + let gsp_mem = self.0.start_ptr_mut(); + + // SAFETY: + // - The 'CoherentAllocation' contains at least one object. + // - By the invariants of `CoherentAllocation` the pointer is valid. + unsafe { (*gsp_mem).cpuq.tx.set_write_ptr(wptr) }; + + // Ensure all command data is visible before triggering the GSP read. + fence(Ordering::SeqCst); + } +} + +/// A command ready to be sent on the command queue. +/// +/// This is the type returned by [`DmaGspMem::allocate_command`]. +struct GspCommand<'a> { + // Writable reference to the header of the command. + header: &'a mut GspMsgElement, + // Writable slices to the contents of the command. The second slice is zero unless the command + // loops over the command queue. + contents: (&'a mut [u8], &'a mut [u8]), +} + +/// A message ready to be processed from the message queue. +/// +/// This is the type returned by [`Cmdq::wait_for_msg`]. +struct GspMessage<'a> { + // Reference to the header of the message. + header: &'a GspMsgElement, + // Slices to the contents of the message. The second slice is zero unless the message loops + // over the message queue. + contents: (&'a [u8], &'a [u8]), +} + +/// GSP command queue. +/// +/// Provides the ability to send commands and receive messages from the GSP using a shared memory +/// area. +pub(crate) struct Cmdq { + /// Device this command queue belongs to. + dev: ARef, + /// Current command sequence number. + seq: u32, + /// Memory area shared with the GSP for communicating commands and messages. + gsp_mem: DmaGspMem, +} + +impl Cmdq { + /// Creates a new command queue for `dev`. + pub(crate) fn new(dev: &device::Device) -> Result { + let gsp_mem = DmaGspMem::new(dev)?; + + Ok(Cmdq { + dev: dev.into(), + seq: 0, + gsp_mem, + }) + } + + /// Computes the checksum for the message pointed to by `it`. + /// + /// A message is made of several parts, so `it` is an iterator over byte slices representing + /// these parts. + fn calculate_checksum>(it: T) -> u32 { + let sum64 = it + .enumerate() + .map(|(idx, byte)| (((idx % 8) * 8) as u32, byte)) + .fold(0, |acc, (rol, byte)| acc ^ u64::from(byte).rotate_left(rol)); + + ((sum64 >> 32) as u32) ^ (sum64 as u32) + } + + /// Notifies the GSP that we have updated the command queue pointers. + fn notify_gsp(bar: &Bar0) { + regs::NV_PGSP_QUEUE_HEAD::default() + .set_address(0) + .write(bar); + } + + /// Sends `command` to the GSP. + /// + /// # Errors + /// + /// - `EAGAIN` if there was not enough space in the command queue to send the command. + /// - `EIO` if the variable payload requested by the command has not been entirely + /// written to by its [`CommandToGsp::init_variable_payload`] method. + /// + /// Error codes returned by the command initializers are propagated as-is. + #[expect(unused)] + pub(crate) fn send_command(&mut self, bar: &Bar0, command: M) -> Result + where + M: CommandToGsp, + // This allows all error types, including `Infallible`, to be used for `M::InitError`. + Error: From, + { + let command_size = size_of::() + command.variable_payload_len(); + let dst = self.gsp_mem.allocate_command(command_size)?; + + // Extract area for the command itself. + let (cmd, payload_1) = M::Command::from_bytes_mut_prefix(dst.contents.0).ok_or(EIO)?; + + // Fill the header and command in-place. + let msg_element = GspMsgElement::init(self.seq, command_size, M::FUNCTION); + // SAFETY: `msg_header` and `cmd` are valid references, and not touched if the initializer + // fails. + unsafe { + msg_element.__init(core::ptr::from_mut(dst.header))?; + command.init().__init(core::ptr::from_mut(cmd))?; + } + + // Fill the variable-length payload. + if command_size > size_of::() { + let mut sbuffer = + SBufferIter::new_writer([&mut payload_1[..], &mut dst.contents.1[..]]); + command.init_variable_payload(&mut sbuffer)?; + + if !sbuffer.is_empty() { + return Err(EIO); + } + } + + // Compute checksum now that the whole message is ready. + dst.header + .set_checksum(Cmdq::calculate_checksum(SBufferIter::new_reader([ + dst.header.as_bytes(), + dst.contents.0, + dst.contents.1, + ]))); + + dev_dbg!( + &self.dev, + "GSP RPC: send: seq# {}, function={}, length=0x{:x}\n", + self.seq, + M::FUNCTION, + dst.header.length(), + ); + + // All set - update the write pointer and inform the GSP of the new command. + let elem_count = dst.header.element_count(); + self.seq += 1; + self.gsp_mem.advance_cpu_write_ptr(elem_count); + Cmdq::notify_gsp(bar); + + Ok(()) + } + + /// Wait for a message to become available on the message queue. + /// + /// This works purely at the transport layer and does not interpret or validate the message + /// beyond the advertised length in its [`GspMsgElement`]. + /// + /// This method returns: + /// + /// - A reference to the [`GspMsgElement`] of the message, + /// - Two byte slices with the contents of the message. The second slice is empty unless the + /// message loops across the message queue. + /// + /// # Errors + /// + /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available. + /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the + /// message queue. + /// + /// Error codes returned by the message constructor are propagated as-is. + fn wait_for_msg(&self, timeout: Delta) -> Result> { + // Wait for a message to arrive from the GSP. + let (slice_1, slice_2) = read_poll_timeout( + || Ok(self.gsp_mem.driver_read_area()), + |driver_area| !driver_area.0.is_empty(), + Delta::from_millis(1), + timeout, + ) + .map(|(slice_1, slice_2)| { + #[allow(clippy::incompatible_msrv)] + (slice_1.as_flattened(), slice_2.as_flattened()) + })?; + + // Extract the `GspMsgElement`. + let (header, slice_1) = GspMsgElement::from_bytes_prefix(slice_1).ok_or(EIO)?; + + dev_dbg!( + self.dev, + "GSP RPC: receive: seq# {}, function={:?}, length=0x{:x}\n", + header.sequence(), + header.function(), + header.length(), + ); + + // Check that the driver read area is large enough for the message. + if slice_1.len() + slice_2.len() < header.length() { + return Err(EIO); + } + + // Cut the message slices down to the actual length of the message. + let (slice_1, slice_2) = if slice_1.len() > header.length() { + // PANIC: we checked above that `slice_1` is at least as long as `msg_header.length()`. + (slice_1.split_at(header.length()).0, &slice_2[0..0]) + } else { + ( + slice_1, + // PANIC: we checked above that `slice_1.len() + slice_2.len()` is at least as + // large as `msg_header.length()`. + slice_2.split_at(header.length() - slice_1.len()).0, + ) + }; + + // Validate checksum. + if Cmdq::calculate_checksum(SBufferIter::new_reader([ + header.as_bytes(), + slice_1, + slice_2, + ])) != 0 + { + dev_err!( + self.dev, + "GSP RPC: receive: Call {} - bad checksum", + header.sequence() + ); + return Err(EIO); + } + + Ok(GspMessage { + header, + contents: (slice_1, slice_2), + }) + } + + /// Receive a message from the GSP. + /// + /// `init` is a closure tasked with processing the message. It receives a reference to the + /// message in the message queue, and a [`SBufferIter`] pointing to its variable-length + /// payload, if any. + /// + /// The expected message is specified using the `M` generic parameter. If the pending message + /// is different, `EAGAIN` is returned and the unexpected message is dropped. + /// + /// This design is by no means final, but it is simple and will let us go through GSP + /// initialization. + /// + /// # Errors + /// + /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available. + /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the + /// message queue. + /// - `EINVAL` if the function of the message was unrecognized. + #[expect(unused)] + pub(crate) fn receive_msg(&mut self, timeout: Delta) -> Result + where + // This allows all error types, including `Infallible`, to be used for `M::InitError`. + Error: From, + { + let message = self.wait_for_msg(timeout)?; + let function = message.header.function().map_err(|_| EINVAL)?; + + // Extract the message. Store the result as we want to advance the read pointer even in + // case of failure. + let result = if function == M::FUNCTION { + let (cmd, contents_1) = M::Message::from_bytes_prefix(message.contents.0).ok_or(EIO)?; + let mut sbuffer = SBufferIter::new_reader([contents_1, message.contents.1]); + + M::read(cmd, &mut sbuffer).map_err(|e| e.into()) + } else { + Err(ERANGE) + }; + + // Advance the read pointer past this message. + self.gsp_mem.advance_cpu_read_ptr(u32::try_from( + message.header.length().div_ceil(GSP_PAGE_SIZE), + )?); + + result + } +} diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index a6ee52475bdb..ceda61c99b92 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -5,10 +5,14 @@ mod r570_144; // Alias to avoid repeating the version number with every use. use r570_144 as bindings; -use core::ops::Range; +use core::{ + fmt, + ops::Range, // +}; use kernel::{ dma::CoherentAllocation, + prelude::*, ptr::{ Alignable, Alignment, // @@ -27,6 +31,7 @@ use crate::{ fb::FbLayout, firmware::gsp::GspFirmware, gpu::Chipset, + gsp::GSP_PAGE_SIZE, num::{ self, FromSafeCast, // @@ -181,6 +186,128 @@ impl GspFwWprMeta { } } +#[derive(Copy, Clone, Debug, PartialEq)] +#[repr(u32)] +pub(crate) enum MsgFunction { + // Common function codes + Nop = bindings::NV_VGPU_MSG_FUNCTION_NOP, + SetGuestSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO, + AllocRoot = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT, + AllocDevice = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE, + AllocMemory = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY, + AllocCtxDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA, + AllocChannelDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA, + MapMemory = bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY, + BindCtxDma = bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA, + AllocObject = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT, + Free = bindings::NV_VGPU_MSG_FUNCTION_FREE, + Log = bindings::NV_VGPU_MSG_FUNCTION_LOG, + GetGspStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO, + SetRegistry = bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY, + GspSetSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO, + GspInitPostObjGpu = bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU, + GspRmControl = bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL, + GetStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO, + + // Event codes + GspInitDone = bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE, + GspRunCpuSequencer = bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER, + PostEvent = bindings::NV_VGPU_MSG_EVENT_POST_EVENT, + RcTriggered = bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED, + MmuFaultQueued = bindings::NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED, + OsErrorLog = bindings::NV_VGPU_MSG_EVENT_OS_ERROR_LOG, + GspPostNoCat = bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD, + GspLockdownNotice = bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE, + UcodeLibOsPrint = bindings::NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT, +} + +impl fmt::Display for MsgFunction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + // Common function codes + MsgFunction::Nop => write!(f, "NOP"), + MsgFunction::SetGuestSystemInfo => write!(f, "SET_GUEST_SYSTEM_INFO"), + MsgFunction::AllocRoot => write!(f, "ALLOC_ROOT"), + MsgFunction::AllocDevice => write!(f, "ALLOC_DEVICE"), + MsgFunction::AllocMemory => write!(f, "ALLOC_MEMORY"), + MsgFunction::AllocCtxDma => write!(f, "ALLOC_CTX_DMA"), + MsgFunction::AllocChannelDma => write!(f, "ALLOC_CHANNEL_DMA"), + MsgFunction::MapMemory => write!(f, "MAP_MEMORY"), + MsgFunction::BindCtxDma => write!(f, "BIND_CTX_DMA"), + MsgFunction::AllocObject => write!(f, "ALLOC_OBJECT"), + MsgFunction::Free => write!(f, "FREE"), + MsgFunction::Log => write!(f, "LOG"), + MsgFunction::GetGspStaticInfo => write!(f, "GET_GSP_STATIC_INFO"), + MsgFunction::SetRegistry => write!(f, "SET_REGISTRY"), + MsgFunction::GspSetSystemInfo => write!(f, "GSP_SET_SYSTEM_INFO"), + MsgFunction::GspInitPostObjGpu => write!(f, "GSP_INIT_POST_OBJGPU"), + MsgFunction::GspRmControl => write!(f, "GSP_RM_CONTROL"), + MsgFunction::GetStaticInfo => write!(f, "GET_STATIC_INFO"), + + // Event codes + MsgFunction::GspInitDone => write!(f, "INIT_DONE"), + MsgFunction::GspRunCpuSequencer => write!(f, "RUN_CPU_SEQUENCER"), + MsgFunction::PostEvent => write!(f, "POST_EVENT"), + MsgFunction::RcTriggered => write!(f, "RC_TRIGGERED"), + MsgFunction::MmuFaultQueued => write!(f, "MMU_FAULT_QUEUED"), + MsgFunction::OsErrorLog => write!(f, "OS_ERROR_LOG"), + MsgFunction::GspPostNoCat => write!(f, "NOCAT"), + MsgFunction::GspLockdownNotice => write!(f, "LOCKDOWN_NOTICE"), + MsgFunction::UcodeLibOsPrint => write!(f, "LIBOS_PRINT"), + } + } +} + +impl TryFrom for MsgFunction { + type Error = kernel::error::Error; + + fn try_from(value: u32) -> Result { + match value { + bindings::NV_VGPU_MSG_FUNCTION_NOP => Ok(MsgFunction::Nop), + bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO => { + Ok(MsgFunction::SetGuestSystemInfo) + } + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT => Ok(MsgFunction::AllocRoot), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE => Ok(MsgFunction::AllocDevice), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY => Ok(MsgFunction::AllocMemory), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA => Ok(MsgFunction::AllocCtxDma), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA => Ok(MsgFunction::AllocChannelDma), + bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY => Ok(MsgFunction::MapMemory), + bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA => Ok(MsgFunction::BindCtxDma), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT => Ok(MsgFunction::AllocObject), + bindings::NV_VGPU_MSG_FUNCTION_FREE => Ok(MsgFunction::Free), + bindings::NV_VGPU_MSG_FUNCTION_LOG => Ok(MsgFunction::Log), + bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO => Ok(MsgFunction::GetGspStaticInfo), + bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY => Ok(MsgFunction::SetRegistry), + bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO => Ok(MsgFunction::GspSetSystemInfo), + bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU => { + Ok(MsgFunction::GspInitPostObjGpu) + } + bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL => Ok(MsgFunction::GspRmControl), + bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO => Ok(MsgFunction::GetStaticInfo), + bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE => Ok(MsgFunction::GspInitDone), + bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER => { + Ok(MsgFunction::GspRunCpuSequencer) + } + bindings::NV_VGPU_MSG_EVENT_POST_EVENT => Ok(MsgFunction::PostEvent), + bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED => Ok(MsgFunction::RcTriggered), + bindings::NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED => Ok(MsgFunction::MmuFaultQueued), + bindings::NV_VGPU_MSG_EVENT_OS_ERROR_LOG => Ok(MsgFunction::OsErrorLog), + bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD => Ok(MsgFunction::GspPostNoCat), + bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE => Ok(MsgFunction::GspLockdownNotice), + bindings::NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT => Ok(MsgFunction::UcodeLibOsPrint), + _ => Err(EINVAL), + } + } +} + +impl From for u32 { + fn from(value: MsgFunction) -> Self { + // CAST: `MsgFunction` is `repr(u32)` and can thus be cast losslessly. + value as u32 + } +} + /// Struct containing the arguments required to pass a memory buffer to the GSP /// for use during initialisation. /// @@ -235,3 +362,209 @@ impl LibosMemoryRegionInitArgument { }) } } + +/// TX header for setting up a message queue with the GSP. +#[repr(transparent)] +pub(crate) struct MsgqTxHeader(bindings::msgqTxHeader); + +impl MsgqTxHeader { + /// Create a new TX queue header. + /// + /// # Arguments + /// + /// * `msgq_size` - Total size of the message queue structure, in bytes. + /// * `rx_hdr_offset` - Offset, in bytes, of the start of the RX header in the message queue + /// structure. + /// * `msg_count` - Number of messages that can be sent, i.e. the number of memory pages + /// allocated for the message queue in the message queue structure. + pub(crate) fn new(msgq_size: u32, rx_hdr_offset: u32, msg_count: u32) -> Self { + Self(bindings::msgqTxHeader { + version: 0, + size: msgq_size, + msgSize: num::usize_into_u32::(), + msgCount: msg_count, + writePtr: 0, + flags: 1, + rxHdrOff: rx_hdr_offset, + entryOff: num::usize_into_u32::(), + }) + } + + /// Returns the value of the write pointer for this queue. + pub(crate) fn write_ptr(&self) -> u32 { + let ptr = core::ptr::from_ref(&self.0.writePtr); + + // SAFETY: `ptr` is a valid pointer to a `u32`. + unsafe { ptr.read_volatile() } + } + + /// Sets the value of the write pointer for this queue. + pub(crate) fn set_write_ptr(&mut self, val: u32) { + let ptr = core::ptr::from_mut(&mut self.0.writePtr); + + // SAFETY: `ptr` is a valid pointer to a `u32`. + unsafe { ptr.write_volatile(val) } + } +} + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for MsgqTxHeader {} + +/// RX header for setting up a message queue with the GSP. +#[repr(transparent)] +pub(crate) struct MsgqRxHeader(bindings::msgqRxHeader); + +/// Header for the message RX queue. +impl MsgqRxHeader { + /// Creates a new RX queue header. + pub(crate) fn new() -> Self { + Self(Default::default()) + } + + /// Returns the value of the read pointer for this queue. + pub(crate) fn read_ptr(&self) -> u32 { + let ptr = core::ptr::from_ref(&self.0.readPtr); + + // SAFETY: `ptr` is a valid pointer to a `u32`. + unsafe { ptr.read_volatile() } + } + + /// Sets the value of the read pointer for this queue. + pub(crate) fn set_read_ptr(&mut self, val: u32) { + let ptr = core::ptr::from_mut(&mut self.0.readPtr); + + // SAFETY: `ptr` is a valid pointer to a `u32`. + unsafe { ptr.write_volatile(val) } + } +} + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for MsgqRxHeader {} + +bitfield! { + struct MsgHeaderVersion(u32) { + 31:24 major as u8; + 23:16 minor as u8; + } +} + +impl MsgHeaderVersion { + const MAJOR_TOT: u8 = 3; + const MINOR_TOT: u8 = 0; + + fn new() -> Self { + Self::default() + .set_major(Self::MAJOR_TOT) + .set_minor(Self::MINOR_TOT) + } +} + +impl bindings::rpc_message_header_v { + fn init(cmd_size: usize, function: MsgFunction) -> impl Init { + type RpcMessageHeader = bindings::rpc_message_header_v; + + try_init!(RpcMessageHeader { + header_version: MsgHeaderVersion::new().into(), + signature: bindings::NV_VGPU_MSG_SIGNATURE_VALID, + function: function.into(), + length: size_of::() + .checked_add(cmd_size) + .ok_or(EOVERFLOW) + .and_then(|v| v.try_into().map_err(|_| EINVAL))?, + rpc_result: 0xffffffff, + rpc_result_private: 0xffffffff, + ..Zeroable::init_zeroed() + }) + } +} + +// SAFETY: We can't derive the Zeroable trait for this binding because the +// procedural macro doesn't support the syntax used by bindgen to create the +// __IncompleteArrayField types. So instead we implement it here, which is safe +// because these are explicitly padded structures only containing types for +// which any bit pattern, including all zeros, is valid. +unsafe impl Zeroable for bindings::rpc_message_header_v {} + +/// GSP Message Element. +/// +/// This is essentially a message header expected to be followed by the message data. +#[repr(transparent)] +pub(crate) struct GspMsgElement { + inner: bindings::GSP_MSG_QUEUE_ELEMENT, +} + +impl GspMsgElement { + /// Creates a new message element. + /// + /// # Arguments + /// + /// * `sequence` - Sequence number of the message. + /// * `cmd_size` - Size of the command (not including the message element), in bytes. + /// * `function` - Function of the message. + #[allow(non_snake_case)] + pub(crate) fn init( + sequence: u32, + cmd_size: usize, + function: MsgFunction, + ) -> impl Init { + type RpcMessageHeader = bindings::rpc_message_header_v; + type InnerGspMsgElement = bindings::GSP_MSG_QUEUE_ELEMENT; + let init_inner = try_init!(InnerGspMsgElement { + seqNum: sequence, + elemCount: size_of::() + .checked_add(cmd_size) + .ok_or(EOVERFLOW)? + .div_ceil(GSP_PAGE_SIZE) + .try_into() + .map_err(|_| EOVERFLOW)?, + rpc <- RpcMessageHeader::init(cmd_size, function), + ..Zeroable::init_zeroed() + }); + + try_init!(GspMsgElement { + inner <- init_inner, + }) + } + + /// Sets the checksum of this message. + /// + /// Since the header is also part of the checksum, this is usually called after the whole + /// message has been written to the shared memory area. + pub(crate) fn set_checksum(&mut self, checksum: u32) { + self.inner.checkSum = checksum; + } + + /// Returns the total length of the message. + pub(crate) fn length(&self) -> usize { + // `rpc.length` includes the length of the GspRpcHeader but not the message header. + size_of::() - size_of::() + + num::u32_as_usize(self.inner.rpc.length) + } + + // Returns the sequence number of the message. + pub(crate) fn sequence(&self) -> u32 { + self.inner.rpc.sequence + } + + // Returns the function of the message, if it is valid, or the invalid function number as an + // error. + pub(crate) fn function(&self) -> Result { + self.inner + .rpc + .function + .try_into() + .map_err(|_| self.inner.rpc.function) + } + + // Returns the number of elements (i.e. memory pages) used by this message. + pub(crate) fn element_count(&self) -> u32 { + self.inner.elemCount + } +} + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for GspMsgElement {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for GspMsgElement {} diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index f7b38978c5f8..17fb2392ec3c 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -1,5 +1,36 @@ // SPDX-License-Identifier: GPL-2.0 +#[repr(C)] +#[derive(Default)] +pub struct __IncompleteArrayField(::core::marker::PhantomData, [T; 0]); +impl __IncompleteArrayField { + #[inline] + pub const fn new() -> Self { + __IncompleteArrayField(::core::marker::PhantomData, []) + } + #[inline] + pub fn as_ptr(&self) -> *const T { + self as *const _ as *const T + } + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + self as *mut _ as *mut T + } + #[inline] + pub unsafe fn as_slice(&self, len: usize) -> &[T] { + ::core::slice::from_raw_parts(self.as_ptr(), len) + } + #[inline] + pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { + ::core::slice::from_raw_parts_mut(self.as_mut_ptr(), len) + } +} +impl ::core::fmt::Debug for __IncompleteArrayField { + fn fmt(&self, fmt: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { + fmt.write_str("__IncompleteArrayField") + } +} +pub const NV_VGPU_MSG_SIGNATURE_VALID: u32 = 1129337430; pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS2: u32 = 0; pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL: u32 = 23068672; pub const GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X: u32 = 8388608; @@ -11,6 +42,7 @@ pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB: u32 = 88; pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB: u32 = 280; pub const GSP_FW_WPR_META_REVISION: u32 = 1; pub const GSP_FW_WPR_META_MAGIC: i64 = -2577556379034558285; +pub const REGISTRY_TABLE_ENTRY_TYPE_DWORD: u32 = 1; pub type __u8 = ffi::c_uchar; pub type __u16 = ffi::c_ushort; pub type __u32 = ffi::c_uint; @@ -19,6 +51,345 @@ pub type u8_ = __u8; pub type u16_ = __u16; pub type u32_ = __u32; pub type u64_ = __u64; +pub const NV_VGPU_MSG_FUNCTION_NOP: _bindgen_ty_2 = 0; +pub const NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO: _bindgen_ty_2 = 1; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_ROOT: _bindgen_ty_2 = 2; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE: _bindgen_ty_2 = 3; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY: _bindgen_ty_2 = 4; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA: _bindgen_ty_2 = 5; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA: _bindgen_ty_2 = 6; +pub const NV_VGPU_MSG_FUNCTION_MAP_MEMORY: _bindgen_ty_2 = 7; +pub const NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA: _bindgen_ty_2 = 8; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT: _bindgen_ty_2 = 9; +pub const NV_VGPU_MSG_FUNCTION_FREE: _bindgen_ty_2 = 10; +pub const NV_VGPU_MSG_FUNCTION_LOG: _bindgen_ty_2 = 11; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_VIDMEM: _bindgen_ty_2 = 12; +pub const NV_VGPU_MSG_FUNCTION_UNMAP_MEMORY: _bindgen_ty_2 = 13; +pub const NV_VGPU_MSG_FUNCTION_MAP_MEMORY_DMA: _bindgen_ty_2 = 14; +pub const NV_VGPU_MSG_FUNCTION_UNMAP_MEMORY_DMA: _bindgen_ty_2 = 15; +pub const NV_VGPU_MSG_FUNCTION_GET_EDID: _bindgen_ty_2 = 16; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_DISP_CHANNEL: _bindgen_ty_2 = 17; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_DISP_OBJECT: _bindgen_ty_2 = 18; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_SUBDEVICE: _bindgen_ty_2 = 19; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_DYNAMIC_MEMORY: _bindgen_ty_2 = 20; +pub const NV_VGPU_MSG_FUNCTION_DUP_OBJECT: _bindgen_ty_2 = 21; +pub const NV_VGPU_MSG_FUNCTION_IDLE_CHANNELS: _bindgen_ty_2 = 22; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_EVENT: _bindgen_ty_2 = 23; +pub const NV_VGPU_MSG_FUNCTION_SEND_EVENT: _bindgen_ty_2 = 24; +pub const NV_VGPU_MSG_FUNCTION_REMAPPER_CONTROL: _bindgen_ty_2 = 25; +pub const NV_VGPU_MSG_FUNCTION_DMA_CONTROL: _bindgen_ty_2 = 26; +pub const NV_VGPU_MSG_FUNCTION_DMA_FILL_PTE_MEM: _bindgen_ty_2 = 27; +pub const NV_VGPU_MSG_FUNCTION_MANAGE_HW_RESOURCE: _bindgen_ty_2 = 28; +pub const NV_VGPU_MSG_FUNCTION_BIND_ARBITRARY_CTX_DMA: _bindgen_ty_2 = 29; +pub const NV_VGPU_MSG_FUNCTION_CREATE_FB_SEGMENT: _bindgen_ty_2 = 30; +pub const NV_VGPU_MSG_FUNCTION_DESTROY_FB_SEGMENT: _bindgen_ty_2 = 31; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_SHARE_DEVICE: _bindgen_ty_2 = 32; +pub const NV_VGPU_MSG_FUNCTION_DEFERRED_API_CONTROL: _bindgen_ty_2 = 33; +pub const NV_VGPU_MSG_FUNCTION_REMOVE_DEFERRED_API: _bindgen_ty_2 = 34; +pub const NV_VGPU_MSG_FUNCTION_SIM_ESCAPE_READ: _bindgen_ty_2 = 35; +pub const NV_VGPU_MSG_FUNCTION_SIM_ESCAPE_WRITE: _bindgen_ty_2 = 36; +pub const NV_VGPU_MSG_FUNCTION_SIM_MANAGE_DISPLAY_CONTEXT_DMA: _bindgen_ty_2 = 37; +pub const NV_VGPU_MSG_FUNCTION_FREE_VIDMEM_VIRT: _bindgen_ty_2 = 38; +pub const NV_VGPU_MSG_FUNCTION_PERF_GET_PSTATE_INFO: _bindgen_ty_2 = 39; +pub const NV_VGPU_MSG_FUNCTION_PERF_GET_PERFMON_SAMPLE: _bindgen_ty_2 = 40; +pub const NV_VGPU_MSG_FUNCTION_PERF_GET_VIRTUAL_PSTATE_INFO: _bindgen_ty_2 = 41; +pub const NV_VGPU_MSG_FUNCTION_PERF_GET_LEVEL_INFO: _bindgen_ty_2 = 42; +pub const NV_VGPU_MSG_FUNCTION_MAP_SEMA_MEMORY: _bindgen_ty_2 = 43; +pub const NV_VGPU_MSG_FUNCTION_UNMAP_SEMA_MEMORY: _bindgen_ty_2 = 44; +pub const NV_VGPU_MSG_FUNCTION_SET_SURFACE_PROPERTIES: _bindgen_ty_2 = 45; +pub const NV_VGPU_MSG_FUNCTION_CLEANUP_SURFACE: _bindgen_ty_2 = 46; +pub const NV_VGPU_MSG_FUNCTION_UNLOADING_GUEST_DRIVER: _bindgen_ty_2 = 47; +pub const NV_VGPU_MSG_FUNCTION_TDR_SET_TIMEOUT_STATE: _bindgen_ty_2 = 48; +pub const NV_VGPU_MSG_FUNCTION_SWITCH_TO_VGA: _bindgen_ty_2 = 49; +pub const NV_VGPU_MSG_FUNCTION_GPU_EXEC_REG_OPS: _bindgen_ty_2 = 50; +pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO: _bindgen_ty_2 = 51; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_VIRTMEM: _bindgen_ty_2 = 52; +pub const NV_VGPU_MSG_FUNCTION_UPDATE_PDE_2: _bindgen_ty_2 = 53; +pub const NV_VGPU_MSG_FUNCTION_SET_PAGE_DIRECTORY: _bindgen_ty_2 = 54; +pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_PSTATE_INFO: _bindgen_ty_2 = 55; +pub const NV_VGPU_MSG_FUNCTION_TRANSLATE_GUEST_GPU_PTES: _bindgen_ty_2 = 56; +pub const NV_VGPU_MSG_FUNCTION_RESERVED_57: _bindgen_ty_2 = 57; +pub const NV_VGPU_MSG_FUNCTION_RESET_CURRENT_GR_CONTEXT: _bindgen_ty_2 = 58; +pub const NV_VGPU_MSG_FUNCTION_SET_SEMA_MEM_VALIDATION_STATE: _bindgen_ty_2 = 59; +pub const NV_VGPU_MSG_FUNCTION_GET_ENGINE_UTILIZATION: _bindgen_ty_2 = 60; +pub const NV_VGPU_MSG_FUNCTION_UPDATE_GPU_PDES: _bindgen_ty_2 = 61; +pub const NV_VGPU_MSG_FUNCTION_GET_ENCODER_CAPACITY: _bindgen_ty_2 = 62; +pub const NV_VGPU_MSG_FUNCTION_VGPU_PF_REG_READ32: _bindgen_ty_2 = 63; +pub const NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO_EXT: _bindgen_ty_2 = 64; +pub const NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO: _bindgen_ty_2 = 65; +pub const NV_VGPU_MSG_FUNCTION_RMFS_INIT: _bindgen_ty_2 = 66; +pub const NV_VGPU_MSG_FUNCTION_RMFS_CLOSE_QUEUE: _bindgen_ty_2 = 67; +pub const NV_VGPU_MSG_FUNCTION_RMFS_CLEANUP: _bindgen_ty_2 = 68; +pub const NV_VGPU_MSG_FUNCTION_RMFS_TEST: _bindgen_ty_2 = 69; +pub const NV_VGPU_MSG_FUNCTION_UPDATE_BAR_PDE: _bindgen_ty_2 = 70; +pub const NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD: _bindgen_ty_2 = 71; +pub const NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO: _bindgen_ty_2 = 72; +pub const NV_VGPU_MSG_FUNCTION_SET_REGISTRY: _bindgen_ty_2 = 73; +pub const NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU: _bindgen_ty_2 = 74; +pub const NV_VGPU_MSG_FUNCTION_SUBDEV_EVENT_SET_NOTIFICATION: _bindgen_ty_2 = 75; +pub const NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL: _bindgen_ty_2 = 76; +pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO2: _bindgen_ty_2 = 77; +pub const NV_VGPU_MSG_FUNCTION_DUMP_PROTOBUF_COMPONENT: _bindgen_ty_2 = 78; +pub const NV_VGPU_MSG_FUNCTION_UNSET_PAGE_DIRECTORY: _bindgen_ty_2 = 79; +pub const NV_VGPU_MSG_FUNCTION_GET_CONSOLIDATED_STATIC_INFO: _bindgen_ty_2 = 80; +pub const NV_VGPU_MSG_FUNCTION_GMMU_REGISTER_FAULT_BUFFER: _bindgen_ty_2 = 81; +pub const NV_VGPU_MSG_FUNCTION_GMMU_UNREGISTER_FAULT_BUFFER: _bindgen_ty_2 = 82; +pub const NV_VGPU_MSG_FUNCTION_GMMU_REGISTER_CLIENT_SHADOW_FAULT_BUFFER: _bindgen_ty_2 = 83; +pub const NV_VGPU_MSG_FUNCTION_GMMU_UNREGISTER_CLIENT_SHADOW_FAULT_BUFFER: _bindgen_ty_2 = 84; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_VGPU_FB_USAGE: _bindgen_ty_2 = 85; +pub const NV_VGPU_MSG_FUNCTION_CTRL_NVFBC_SW_SESSION_UPDATE_INFO: _bindgen_ty_2 = 86; +pub const NV_VGPU_MSG_FUNCTION_CTRL_NVENC_SW_SESSION_UPDATE_INFO: _bindgen_ty_2 = 87; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESET_CHANNEL: _bindgen_ty_2 = 88; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESET_ISOLATED_CHANNEL: _bindgen_ty_2 = 89; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_HANDLE_VF_PRI_FAULT: _bindgen_ty_2 = 90; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CLK_GET_EXTENDED_INFO: _bindgen_ty_2 = 91; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_BOOST: _bindgen_ty_2 = 92; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_VPSTATES_GET_CONTROL: _bindgen_ty_2 = 93; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_ZBC_CLEAR_TABLE: _bindgen_ty_2 = 94; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_ZBC_COLOR_CLEAR: _bindgen_ty_2 = 95; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_ZBC_DEPTH_CLEAR: _bindgen_ty_2 = 96; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPFIFO_SCHEDULE: _bindgen_ty_2 = 97; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_TIMESLICE: _bindgen_ty_2 = 98; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PREEMPT: _bindgen_ty_2 = 99; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_DISABLE_CHANNELS: _bindgen_ty_2 = 100; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_TSG_INTERLEAVE_LEVEL: _bindgen_ty_2 = 101; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_CHANNEL_INTERLEAVE_LEVEL: _bindgen_ty_2 = 102; +pub const NV_VGPU_MSG_FUNCTION_GSP_RM_ALLOC: _bindgen_ty_2 = 103; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_P2P_CAPS_V2: _bindgen_ty_2 = 104; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CIPHER_AES_ENCRYPT: _bindgen_ty_2 = 105; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CIPHER_SESSION_KEY: _bindgen_ty_2 = 106; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CIPHER_SESSION_KEY_STATUS: _bindgen_ty_2 = 107; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_CLEAR_ALL_SM_ERROR_STATES: _bindgen_ty_2 = 108; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_READ_ALL_SM_ERROR_STATES: _bindgen_ty_2 = 109; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_EXCEPTION_MASK: _bindgen_ty_2 = 110; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_PROMOTE_CTX: _bindgen_ty_2 = 111; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_CTXSW_PREEMPTION_BIND: _bindgen_ty_2 = 112; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_SET_CTXSW_PREEMPTION_MODE: _bindgen_ty_2 = 113; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_CTXSW_ZCULL_BIND: _bindgen_ty_2 = 114; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_INITIALIZE_CTX: _bindgen_ty_2 = 115; +pub const NV_VGPU_MSG_FUNCTION_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES: _bindgen_ty_2 = 116; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_CLEAR_FAULTED_BIT: _bindgen_ty_2 = 117; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_LATEST_ECC_ADDRESSES: _bindgen_ty_2 = 118; +pub const NV_VGPU_MSG_FUNCTION_CTRL_MC_SERVICE_INTERRUPTS: _bindgen_ty_2 = 119; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DMA_SET_DEFAULT_VASPACE: _bindgen_ty_2 = 120; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_CE_PCE_MASK: _bindgen_ty_2 = 121; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_ZBC_CLEAR_TABLE_ENTRY: _bindgen_ty_2 = 122; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_NVLINK_PEER_ID_MASK: _bindgen_ty_2 = 123; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_NVLINK_STATUS: _bindgen_ty_2 = 124; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_P2P_CAPS: _bindgen_ty_2 = 125; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_P2P_CAPS_MATRIX: _bindgen_ty_2 = 126; +pub const NV_VGPU_MSG_FUNCTION_RESERVED_0: _bindgen_ty_2 = 127; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_PM_AREA_SMPC: _bindgen_ty_2 = 128; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_HWPM_LEGACY: _bindgen_ty_2 = 129; +pub const NV_VGPU_MSG_FUNCTION_CTRL_B0CC_EXEC_REG_OPS: _bindgen_ty_2 = 130; +pub const NV_VGPU_MSG_FUNCTION_CTRL_BIND_PM_RESOURCES: _bindgen_ty_2 = 131; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SUSPEND_CONTEXT: _bindgen_ty_2 = 132; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_RESUME_CONTEXT: _bindgen_ty_2 = 133; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_EXEC_REG_OPS: _bindgen_ty_2 = 134; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_MODE_MMU_DEBUG: _bindgen_ty_2 = 135; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_READ_SINGLE_SM_ERROR_STATE: _bindgen_ty_2 = 136; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_CLEAR_SINGLE_SM_ERROR_STATE: _bindgen_ty_2 = 137; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_MODE_ERRBAR_DEBUG: _bindgen_ty_2 = 138; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_NEXT_STOP_TRIGGER_TYPE: _bindgen_ty_2 = 139; +pub const NV_VGPU_MSG_FUNCTION_CTRL_ALLOC_PMA_STREAM: _bindgen_ty_2 = 140; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PMA_STREAM_UPDATE_GET_PUT: _bindgen_ty_2 = 141; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FB_GET_INFO_V2: _bindgen_ty_2 = 142; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_SET_CHANNEL_PROPERTIES: _bindgen_ty_2 = 143; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_GET_CTX_BUFFER_INFO: _bindgen_ty_2 = 144; +pub const NV_VGPU_MSG_FUNCTION_CTRL_KGR_GET_CTX_BUFFER_PTES: _bindgen_ty_2 = 145; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_EVICT_CTX: _bindgen_ty_2 = 146; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FB_GET_FS_INFO: _bindgen_ty_2 = 147; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GRMGR_GET_GR_FS_INFO: _bindgen_ty_2 = 148; +pub const NV_VGPU_MSG_FUNCTION_CTRL_STOP_CHANNEL: _bindgen_ty_2 = 149; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_PC_SAMPLING_MODE: _bindgen_ty_2 = 150; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_RATED_TDP_GET_STATUS: _bindgen_ty_2 = 151; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_RATED_TDP_SET_CONTROL: _bindgen_ty_2 = 152; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FREE_PMA_STREAM: _bindgen_ty_2 = 153; +pub const NV_VGPU_MSG_FUNCTION_CTRL_TIMER_SET_GR_TICK_FREQ: _bindgen_ty_2 = 154; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_SETUP_VF_ZOMBIE_SUBCTX_PDB: _bindgen_ty_2 = 155; +pub const NV_VGPU_MSG_FUNCTION_GET_CONSOLIDATED_GR_STATIC_INFO: _bindgen_ty_2 = 156; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_SINGLE_SM_SINGLE_STEP: _bindgen_ty_2 = 157; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_GET_TPC_PARTITION_MODE: _bindgen_ty_2 = 158; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_SET_TPC_PARTITION_MODE: _bindgen_ty_2 = 159; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_ALLOCATE: _bindgen_ty_2 = 160; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_DESTROY: _bindgen_ty_2 = 161; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_MAP: _bindgen_ty_2 = 162; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_UNMAP: _bindgen_ty_2 = 163; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_PUSH_STREAM: _bindgen_ty_2 = 164; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_SET_HANDLES: _bindgen_ty_2 = 165; +pub const NV_VGPU_MSG_FUNCTION_UVM_METHOD_STREAM_GUEST_PAGES_OPERATION: _bindgen_ty_2 = 166; +pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL: _bindgen_ty_2 = 167; +pub const NV_VGPU_MSG_FUNCTION_DCE_RM_INIT: _bindgen_ty_2 = 168; +pub const NV_VGPU_MSG_FUNCTION_REGISTER_VIRTUAL_EVENT_BUFFER: _bindgen_ty_2 = 169; +pub const NV_VGPU_MSG_FUNCTION_CTRL_EVENT_BUFFER_UPDATE_GET: _bindgen_ty_2 = 170; +pub const NV_VGPU_MSG_FUNCTION_GET_PLCABLE_ADDRESS_KIND: _bindgen_ty_2 = 171; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_LIMITS_SET_STATUS_V2: _bindgen_ty_2 = 172; +pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_SRIOV_PROMOTE_PMA_STREAM: _bindgen_ty_2 = 173; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_MMU_DEBUG_MODE: _bindgen_ty_2 = 174; +pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_PROMOTE_FAULT_METHOD_BUFFERS: _bindgen_ty_2 = 175; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FLCN_GET_CTX_BUFFER_SIZE: _bindgen_ty_2 = 176; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FLCN_GET_CTX_BUFFER_INFO: _bindgen_ty_2 = 177; +pub const NV_VGPU_MSG_FUNCTION_DISABLE_CHANNELS: _bindgen_ty_2 = 178; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FABRIC_MEMORY_DESCRIBE: _bindgen_ty_2 = 179; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FABRIC_MEM_STATS: _bindgen_ty_2 = 180; +pub const NV_VGPU_MSG_FUNCTION_SAVE_HIBERNATION_DATA: _bindgen_ty_2 = 181; +pub const NV_VGPU_MSG_FUNCTION_RESTORE_HIBERNATION_DATA: _bindgen_ty_2 = 182; +pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_MEMSYS_SET_ZBC_REFERENCED: _bindgen_ty_2 = 183; +pub const NV_VGPU_MSG_FUNCTION_CTRL_EXEC_PARTITIONS_CREATE: _bindgen_ty_2 = 184; +pub const NV_VGPU_MSG_FUNCTION_CTRL_EXEC_PARTITIONS_DELETE: _bindgen_ty_2 = 185; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPFIFO_GET_WORK_SUBMIT_TOKEN: _bindgen_ty_2 = 186; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPFIFO_SET_WORK_SUBMIT_TOKEN_NOTIF_INDEX: _bindgen_ty_2 = 187; +pub const NV_VGPU_MSG_FUNCTION_PMA_SCRUBBER_SHARED_BUFFER_GUEST_PAGES_OPERATION: _bindgen_ty_2 = + 188; +pub const NV_VGPU_MSG_FUNCTION_CTRL_MASTER_GET_VIRTUAL_FUNCTION_ERROR_CONT_INTR_MASK: + _bindgen_ty_2 = 189; +pub const NV_VGPU_MSG_FUNCTION_SET_SYSMEM_DIRTY_PAGE_TRACKING_BUFFER: _bindgen_ty_2 = 190; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SUBDEVICE_GET_P2P_CAPS: _bindgen_ty_2 = 191; +pub const NV_VGPU_MSG_FUNCTION_CTRL_BUS_SET_P2P_MAPPING: _bindgen_ty_2 = 192; +pub const NV_VGPU_MSG_FUNCTION_CTRL_BUS_UNSET_P2P_MAPPING: _bindgen_ty_2 = 193; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FLA_SETUP_INSTANCE_MEM_BLOCK: _bindgen_ty_2 = 194; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_MIGRATABLE_OPS: _bindgen_ty_2 = 195; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_TOTAL_HS_CREDITS: _bindgen_ty_2 = 196; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_HS_CREDITS: _bindgen_ty_2 = 197; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_HS_CREDITS: _bindgen_ty_2 = 198; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PM_AREA_PC_SAMPLER: _bindgen_ty_2 = 199; +pub const NV_VGPU_MSG_FUNCTION_INVALIDATE_TLB: _bindgen_ty_2 = 200; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_QUERY_ECC_STATUS: _bindgen_ty_2 = 201; +pub const NV_VGPU_MSG_FUNCTION_ECC_NOTIFIER_WRITE_ACK: _bindgen_ty_2 = 202; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_GET_MODE_MMU_DEBUG: _bindgen_ty_2 = 203; +pub const NV_VGPU_MSG_FUNCTION_RM_API_CONTROL: _bindgen_ty_2 = 204; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_INTERNAL_GPU_START_FABRIC_PROBE: _bindgen_ty_2 = 205; +pub const NV_VGPU_MSG_FUNCTION_CTRL_NVLINK_GET_INBAND_RECEIVED_DATA: _bindgen_ty_2 = 206; +pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_DATA: _bindgen_ty_2 = 207; +pub const NV_VGPU_MSG_FUNCTION_RESERVED_208: _bindgen_ty_2 = 208; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_GET_INFO_V2: _bindgen_ty_2 = 209; +pub const NV_VGPU_MSG_FUNCTION_GET_BRAND_CAPS: _bindgen_ty_2 = 210; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_NVLINK_INBAND_SEND_DATA: _bindgen_ty_2 = 211; +pub const NV_VGPU_MSG_FUNCTION_UPDATE_GPM_GUEST_BUFFER_INFO: _bindgen_ty_2 = 212; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_INTERNAL_CONTROL_GSP_TRACE: _bindgen_ty_2 = 213; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_ZBC_STENCIL_CLEAR: _bindgen_ty_2 = 214; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SUBDEVICE_GET_VGPU_HEAP_STATS: _bindgen_ty_2 = 215; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SUBDEVICE_GET_LIBOS_HEAP_STATS: _bindgen_ty_2 = 216; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_MODE_MMU_GCC_DEBUG: _bindgen_ty_2 = 217; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_GET_MODE_MMU_GCC_DEBUG: _bindgen_ty_2 = 218; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_HES: _bindgen_ty_2 = 219; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RELEASE_HES: _bindgen_ty_2 = 220; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_CCU_PROF: _bindgen_ty_2 = 221; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RELEASE_CCU_PROF: _bindgen_ty_2 = 222; +pub const NV_VGPU_MSG_FUNCTION_RESERVED: _bindgen_ty_2 = 223; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_GET_CHIPLET_HS_CREDIT_POOL: _bindgen_ty_2 = 224; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_GET_HS_CREDITS_MAPPING: _bindgen_ty_2 = 225; +pub const NV_VGPU_MSG_FUNCTION_CTRL_EXEC_PARTITIONS_EXPORT: _bindgen_ty_2 = 226; +pub const NV_VGPU_MSG_FUNCTION_NUM_FUNCTIONS: _bindgen_ty_2 = 227; +pub type _bindgen_ty_2 = ffi::c_uint; +pub const NV_VGPU_MSG_EVENT_FIRST_EVENT: _bindgen_ty_3 = 4096; +pub const NV_VGPU_MSG_EVENT_GSP_INIT_DONE: _bindgen_ty_3 = 4097; +pub const NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER: _bindgen_ty_3 = 4098; +pub const NV_VGPU_MSG_EVENT_POST_EVENT: _bindgen_ty_3 = 4099; +pub const NV_VGPU_MSG_EVENT_RC_TRIGGERED: _bindgen_ty_3 = 4100; +pub const NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED: _bindgen_ty_3 = 4101; +pub const NV_VGPU_MSG_EVENT_OS_ERROR_LOG: _bindgen_ty_3 = 4102; +pub const NV_VGPU_MSG_EVENT_RG_LINE_INTR: _bindgen_ty_3 = 4103; +pub const NV_VGPU_MSG_EVENT_GPUACCT_PERFMON_UTIL_SAMPLES: _bindgen_ty_3 = 4104; +pub const NV_VGPU_MSG_EVENT_SIM_READ: _bindgen_ty_3 = 4105; +pub const NV_VGPU_MSG_EVENT_SIM_WRITE: _bindgen_ty_3 = 4106; +pub const NV_VGPU_MSG_EVENT_SEMAPHORE_SCHEDULE_CALLBACK: _bindgen_ty_3 = 4107; +pub const NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT: _bindgen_ty_3 = 4108; +pub const NV_VGPU_MSG_EVENT_VGPU_GSP_PLUGIN_TRIGGERED: _bindgen_ty_3 = 4109; +pub const NV_VGPU_MSG_EVENT_PERF_GPU_BOOST_SYNC_LIMITS_CALLBACK: _bindgen_ty_3 = 4110; +pub const NV_VGPU_MSG_EVENT_PERF_BRIDGELESS_INFO_UPDATE: _bindgen_ty_3 = 4111; +pub const NV_VGPU_MSG_EVENT_VGPU_CONFIG: _bindgen_ty_3 = 4112; +pub const NV_VGPU_MSG_EVENT_DISPLAY_MODESET: _bindgen_ty_3 = 4113; +pub const NV_VGPU_MSG_EVENT_EXTDEV_INTR_SERVICE: _bindgen_ty_3 = 4114; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_256: _bindgen_ty_3 = 4115; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_512: _bindgen_ty_3 = 4116; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_1024: _bindgen_ty_3 = 4117; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_2048: _bindgen_ty_3 = 4118; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_4096: _bindgen_ty_3 = 4119; +pub const NV_VGPU_MSG_EVENT_TIMED_SEMAPHORE_RELEASE: _bindgen_ty_3 = 4120; +pub const NV_VGPU_MSG_EVENT_NVLINK_IS_GPU_DEGRADED: _bindgen_ty_3 = 4121; +pub const NV_VGPU_MSG_EVENT_PFM_REQ_HNDLR_STATE_SYNC_CALLBACK: _bindgen_ty_3 = 4122; +pub const NV_VGPU_MSG_EVENT_NVLINK_FAULT_UP: _bindgen_ty_3 = 4123; +pub const NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE: _bindgen_ty_3 = 4124; +pub const NV_VGPU_MSG_EVENT_MIG_CI_CONFIG_UPDATE: _bindgen_ty_3 = 4125; +pub const NV_VGPU_MSG_EVENT_UPDATE_GSP_TRACE: _bindgen_ty_3 = 4126; +pub const NV_VGPU_MSG_EVENT_NVLINK_FATAL_ERROR_RECOVERY: _bindgen_ty_3 = 4127; +pub const NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD: _bindgen_ty_3 = 4128; +pub const NV_VGPU_MSG_EVENT_FECS_ERROR: _bindgen_ty_3 = 4129; +pub const NV_VGPU_MSG_EVENT_RECOVERY_ACTION: _bindgen_ty_3 = 4130; +pub const NV_VGPU_MSG_EVENT_NUM_EVENTS: _bindgen_ty_3 = 4131; +pub type _bindgen_ty_3 = ffi::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct MESSAGE_QUEUE_INIT_ARGUMENTS { + pub sharedMemPhysAddr: u64_, + pub pageTableEntryCount: u32_, + pub __bindgen_padding_0: [u8; 4usize], + pub cmdQueueOffset: u64_, + pub statQueueOffset: u64_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GSP_SR_INIT_ARGUMENTS { + pub oldLevel: u32_, + pub flags: u32_, + pub bInPMTransition: u8_, + pub __bindgen_padding_0: [u8; 3usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GSP_ARGUMENTS_CACHED { + pub messageQueueInitArguments: MESSAGE_QUEUE_INIT_ARGUMENTS, + pub srInitArguments: GSP_SR_INIT_ARGUMENTS, + pub gpuInstance: u32_, + pub bDmemStack: u8_, + pub __bindgen_padding_0: [u8; 7usize], + pub profilerArgs: GSP_ARGUMENTS_CACHED__bindgen_ty_1, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GSP_ARGUMENTS_CACHED__bindgen_ty_1 { + pub pa: u64_, + pub size: u64_, +} +#[repr(C)] +#[derive(Copy, Clone, Zeroable)] +pub union rpc_message_rpc_union_field_v03_00 { + pub spare: u32_, + pub cpuRmGfid: u32_, +} +impl Default for rpc_message_rpc_union_field_v03_00 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +pub type rpc_message_rpc_union_field_v = rpc_message_rpc_union_field_v03_00; +#[repr(C)] +pub struct rpc_message_header_v03_00 { + pub header_version: u32_, + pub signature: u32_, + pub length: u32_, + pub function: u32_, + pub rpc_result: u32_, + pub rpc_result_private: u32_, + pub sequence: u32_, + pub u: rpc_message_rpc_union_field_v, + pub rpc_message_data: __IncompleteArrayField, +} +impl Default for rpc_message_header_v03_00 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +pub type rpc_message_header_v = rpc_message_header_v03_00; #[repr(C)] #[derive(Copy, Clone, Zeroable)] pub struct GspFwWprMeta { @@ -145,3 +516,41 @@ pub struct LibosMemoryRegionInitArgument { pub loc: u8_, pub __bindgen_padding_0: [u8; 6usize], } +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct msgqTxHeader { + pub version: u32_, + pub size: u32_, + pub msgSize: u32_, + pub msgCount: u32_, + pub writePtr: u32_, + pub flags: u32_, + pub rxHdrOff: u32_, + pub entryOff: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct msgqRxHeader { + pub readPtr: u32_, +} +#[repr(C)] +#[repr(align(8))] +#[derive(Zeroable)] +pub struct GSP_MSG_QUEUE_ELEMENT { + pub authTagBuffer: [u8_; 16usize], + pub aadBuffer: [u8_; 16usize], + pub checkSum: u32_, + pub seqNum: u32_, + pub elemCount: u32_, + pub __bindgen_padding_0: [u8; 4usize], + pub rpc: rpc_message_header_v, +} +impl Default for GSP_MSG_QUEUE_ELEMENT { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 934003cab8a8..41fdda8a0748 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -86,6 +86,10 @@ register!(NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE @ 0x00100ce0 { 30:30 ecc_mode_enabled as bool; }); +register!(NV_PGSP_QUEUE_HEAD @ 0x00110c00 { + 31:0 address as u32; +}); + impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE { /// Returns the usable framebuffer size, in bytes. pub(crate) fn usable_fb_size(self) -> u64 { diff --git a/drivers/gpu/nova-core/sbuffer.rs b/drivers/gpu/nova-core/sbuffer.rs index f0cecbcb81be..25e3ad665cac 100644 --- a/drivers/gpu/nova-core/sbuffer.rs +++ b/drivers/gpu/nova-core/sbuffer.rs @@ -50,7 +50,6 @@ where /// let sum: u8 = sbuffer.sum(); /// assert_eq!(sum, 45); /// ``` - #[expect(unused)] pub(crate) fn new_reader(slices: impl IntoIterator) -> Self where I: Iterator, @@ -72,7 +71,6 @@ where /// assert_eq!(buf2, [5, 6, 7, 8, 9]); /// /// ``` - #[expect(unused)] pub(crate) fn new_writer(slices: impl IntoIterator) -> Self where I: Iterator, @@ -131,7 +129,6 @@ where } /// Returns whether this buffer still has data available. - #[expect(unused)] pub(crate) fn is_empty(&self) -> bool { self.cur_slice.is_none() } -- cgit From 4fd4acd973ec6c734e928d19aaa649d4268303a1 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 10 Nov 2025 22:34:18 +0900 Subject: gpu: nova-core: gsp: Create rmargs Initialise the GSP resource manager arguments (rmargs) which provides initialisation parameters to the GSP firmware during boot. The rmargs structure contains arguments to configure the GSP message/command queue location. These are mapped for coherent DMA and added to the libos data structure for access when booting GSP. Signed-off-by: Alistair Popple Co-developed-by: Alexandre Courbot Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-10-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/gsp.rs | 18 ++++++++++++++-- drivers/gpu/nova-core/gsp/cmdq.rs | 27 +++++++++++++++++++++++- drivers/gpu/nova-core/gsp/fw.rs | 44 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index f9819a04bb40..609f8e5f2dcc 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -24,8 +24,11 @@ pub(crate) use fw::{ use crate::{ gsp::cmdq::Cmdq, - gsp::fw::LibosMemoryRegionInitArgument, - num, // + gsp::fw::{ + GspArgumentsCached, + LibosMemoryRegionInitArgument, // + }, + num, }; pub(crate) const GSP_PAGE_SHIFT: usize = 12; @@ -108,6 +111,8 @@ pub(crate) struct Gsp { logrm: LogBuffer, /// Command queue. pub(crate) cmdq: Cmdq, + /// RM arguments. + rmargs: CoherentAllocation, } impl Gsp { @@ -134,11 +139,20 @@ impl Gsp { let cmdq = Cmdq::new(dev)?; + let rmargs = CoherentAllocation::::alloc_coherent( + dev, + 1, + GFP_KERNEL | __GFP_ZERO, + )?; + dma_write!(rmargs[0] = fw::GspArgumentsCached::new(&cmdq))?; + dma_write!(libos[3] = LibosMemoryRegionInitArgument::new("RMARGS", &rmargs))?; + Ok(try_pin_init!(Self { libos, loginit, logintr, logrm, + rmargs, cmdq, })) } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs index c00d9fa9b79b..295903c28922 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -11,7 +11,10 @@ use core::{ use kernel::{ device, - dma::CoherentAllocation, + dma::{ + CoherentAllocation, + DmaAddress, // + }, dma_write, io::poll::read_poll_timeout, prelude::*, @@ -33,6 +36,7 @@ use crate::{ MsgqTxHeader, // }, PteArray, + GSP_PAGE_SHIFT, GSP_PAGE_SIZE, // }, num, @@ -429,6 +433,22 @@ pub(crate) struct Cmdq { } impl Cmdq { + /// Offset of the data after the PTEs. + const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq); + + /// Offset of command queue ring buffer. + pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq) + + core::mem::offset_of!(Msgq, msgq) + - Self::POST_PTE_OFFSET; + + /// Offset of message queue ring buffer. + pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq) + + core::mem::offset_of!(Msgq, msgq) + - Self::POST_PTE_OFFSET; + + /// Number of page table entries for the GSP shared region. + pub(crate) const NUM_PTES: usize = size_of::() >> GSP_PAGE_SHIFT; + /// Creates a new command queue for `dev`. pub(crate) fn new(dev: &device::Device) -> Result { let gsp_mem = DmaGspMem::new(dev)?; @@ -653,4 +673,9 @@ impl Cmdq { result } + + /// Returns the DMA handle of the command queue's shared memory region. + pub(crate) fn dma_handle(&self) -> DmaAddress { + self.gsp_mem.0.dma_handle() + } } diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index ceda61c99b92..b083a6a5754c 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -31,7 +31,10 @@ use crate::{ fb::FbLayout, firmware::gsp::GspFirmware, gpu::Chipset, - gsp::GSP_PAGE_SIZE, + gsp::{ + cmdq::Cmdq, // + GSP_PAGE_SIZE, + }, num::{ self, FromSafeCast, // @@ -568,3 +571,42 @@ unsafe impl AsBytes for GspMsgElement {} // SAFETY: This struct only contains integer types for which all bit patterns // are valid. unsafe impl FromBytes for GspMsgElement {} + +/// Arguments for GSP startup. +#[repr(transparent)] +pub(crate) struct GspArgumentsCached(bindings::GSP_ARGUMENTS_CACHED); + +impl GspArgumentsCached { + /// Creates the arguments for starting the GSP up using `cmdq` as its command queue. + pub(crate) fn new(cmdq: &Cmdq) -> Self { + Self(bindings::GSP_ARGUMENTS_CACHED { + messageQueueInitArguments: MessageQueueInitArguments::new(cmdq).0, + bDmemStack: 1, + ..Default::default() + }) + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for GspArgumentsCached {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for GspArgumentsCached {} + +/// Init arguments for the message queue. +#[repr(transparent)] +struct MessageQueueInitArguments(bindings::MESSAGE_QUEUE_INIT_ARGUMENTS); + +impl MessageQueueInitArguments { + /// Creates a new init arguments structure for `cmdq`. + fn new(cmdq: &Cmdq) -> Self { + Self(bindings::MESSAGE_QUEUE_INIT_ARGUMENTS { + sharedMemPhysAddr: cmdq.dma_handle(), + pageTableEntryCount: num::usize_into_u32::<{ Cmdq::NUM_PTES }>(), + cmdQueueOffset: num::usize_as_u64(Cmdq::CMDQ_OFFSET), + statQueueOffset: num::usize_as_u64(Cmdq::STATQ_OFFSET), + ..Default::default() + }) + } +} -- cgit From edcb134264f7db95295a9f0feb7a8b3acde72a08 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 10 Nov 2025 22:34:19 +0900 Subject: gpu: nova-core: gsp: Add SetSystemInfo command Add support for sending the SetSystemInfo command, which provides required hardware information to the GSP and is critical to its initialization. Signed-off-by: Alistair Popple Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-11-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/gsp.rs | 1 + drivers/gpu/nova-core/gsp/boot.rs | 10 +- drivers/gpu/nova-core/gsp/cmdq.rs | 1 - drivers/gpu/nova-core/gsp/commands.rs | 37 ++++++ drivers/gpu/nova-core/gsp/fw.rs | 1 + drivers/gpu/nova-core/gsp/fw/commands.rs | 56 +++++++++ drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs | 132 ++++++++++++++++++++++ 7 files changed, 235 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/nova-core/gsp/commands.rs create mode 100644 drivers/gpu/nova-core/gsp/fw/commands.rs diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 609f8e5f2dcc..e40354c47608 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -15,6 +15,7 @@ use kernel::{ }; pub(crate) mod cmdq; +pub(crate) mod commands; mod fw; pub(crate) use fw::{ diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 5ea53250bf37..56c9950b742c 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -29,7 +29,10 @@ use crate::{ FIRMWARE_VERSION, // }, gpu::Chipset, - gsp::GspFwWprMeta, + gsp::{ + commands, + GspFwWprMeta, // + }, regs, vbios::Vbios, }; @@ -119,7 +122,7 @@ impl super::Gsp { /// /// Upon return, the GSP is up and running, and its runtime object given as return value. pub(crate) fn boot( - self: Pin<&mut Self>, + mut self: Pin<&mut Self>, pdev: &pci::Device, bar: &Bar0, chipset: Chipset, @@ -153,6 +156,9 @@ impl super::Gsp { CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; dma_write!(wpr_meta[0] = GspFwWprMeta::new(&gsp_fw, &fb_layout))?; + self.cmdq + .send_command(bar, commands::SetSystemInfo::new(pdev))?; + Ok(()) } } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs index 295903c28922..c0f3218f2980 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -489,7 +489,6 @@ impl Cmdq { /// written to by its [`CommandToGsp::init_variable_payload`] method. /// /// Error codes returned by the command initializers are propagated as-is. - #[expect(unused)] pub(crate) fn send_command(&mut self, bar: &Bar0, command: M) -> Result where M: CommandToGsp, diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs new file mode 100644 index 000000000000..305045e25693 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/commands.rs @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + device, + pci, + prelude::*, // +}; + +use crate::gsp::{ + cmdq::CommandToGsp, + fw::{ + commands::GspSetSystemInfo, + MsgFunction, // + }, +}; + +/// The `GspSetSystemInfo` command. +pub(crate) struct SetSystemInfo<'a> { + pdev: &'a pci::Device, +} + +impl<'a> SetSystemInfo<'a> { + /// Creates a new `GspSetSystemInfo` command using the parameters of `pdev`. + pub(crate) fn new(pdev: &'a pci::Device) -> Self { + Self { pdev } + } +} + +impl<'a> CommandToGsp for SetSystemInfo<'a> { + const FUNCTION: MsgFunction = MsgFunction::GspSetSystemInfo; + type Command = GspSetSystemInfo; + type InitError = Error; + + fn init(&self) -> impl Init { + GspSetSystemInfo::init(self.pdev) + } +} diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index b083a6a5754c..cacdfb2d4810 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +pub(crate) mod commands; mod r570_144; // Alias to avoid repeating the version number with every use. diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs b/drivers/gpu/nova-core/gsp/fw/commands.rs new file mode 100644 index 000000000000..0d3c46f793dd --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/commands.rs @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; +use kernel::transmute::{AsBytes, FromBytes}; +use kernel::{device, pci}; + +use crate::gsp::GSP_PAGE_SIZE; + +use super::bindings; + +/// Payload of the `GspSetSystemInfo` command. +#[repr(transparent)] +pub(crate) struct GspSetSystemInfo { + inner: bindings::GspSystemInfo, +} +static_assert!(size_of::() < GSP_PAGE_SIZE); + +impl GspSetSystemInfo { + /// Returns an in-place initializer for the `GspSetSystemInfo` command. + #[allow(non_snake_case)] + pub(crate) fn init<'a>(dev: &'a pci::Device) -> impl Init + 'a { + type InnerGspSystemInfo = bindings::GspSystemInfo; + let init_inner = try_init!(InnerGspSystemInfo { + gpuPhysAddr: dev.resource_start(0)?, + gpuPhysFbAddr: dev.resource_start(1)?, + gpuPhysInstAddr: dev.resource_start(3)?, + nvDomainBusDeviceFunc: u64::from(dev.dev_id()), + + // Using TASK_SIZE in r535_gsp_rpc_set_system_info() seems wrong because + // TASK_SIZE is per-task. That's probably a design issue in GSP-RM though. + maxUserVa: (1 << 47) - 4096, + pciConfigMirrorBase: 0x088000, + pciConfigMirrorSize: 0x001000, + + PCIDeviceID: (u32::from(dev.device_id()) << 16) | u32::from(dev.vendor_id().as_raw()), + PCISubDeviceID: (u32::from(dev.subsystem_device_id()) << 16) + | u32::from(dev.subsystem_vendor_id()), + PCIRevisionID: u32::from(dev.revision_id()), + bIsPrimary: 0, + bPreserveVideoMemoryAllocations: 0, + ..Zeroable::init_zeroed() + }); + + try_init!(GspSetSystemInfo { + inner <- init_inner, + }) + } +} + +// SAFETY: These structs don't meet the no-padding requirements of AsBytes but +// that is not a problem because they are not used outside the kernel. +unsafe impl AsBytes for GspSetSystemInfo {} + +// SAFETY: These structs don't meet the no-padding requirements of FromBytes but +// that is not a problem because they are not used outside the kernel. +unsafe impl FromBytes for GspSetSystemInfo {} diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index 17fb2392ec3c..1251b0c313ce 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -321,6 +321,138 @@ pub const NV_VGPU_MSG_EVENT_NUM_EVENTS: _bindgen_ty_3 = 4131; pub type _bindgen_ty_3 = ffi::c_uint; #[repr(C)] #[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct DOD_METHOD_DATA { + pub status: u32_, + pub acpiIdListLen: u32_, + pub acpiIdList: [u32_; 16usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct JT_METHOD_DATA { + pub status: u32_, + pub jtCaps: u32_, + pub jtRevId: u16_, + pub bSBIOSCaps: u8_, + pub __bindgen_padding_0: u8, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct MUX_METHOD_DATA_ELEMENT { + pub acpiId: u32_, + pub mode: u32_, + pub status: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct MUX_METHOD_DATA { + pub tableLen: u32_, + pub acpiIdMuxModeTable: [MUX_METHOD_DATA_ELEMENT; 16usize], + pub acpiIdMuxPartTable: [MUX_METHOD_DATA_ELEMENT; 16usize], + pub acpiIdMuxStateTable: [MUX_METHOD_DATA_ELEMENT; 16usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct CAPS_METHOD_DATA { + pub status: u32_, + pub optimusCaps: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct ACPI_METHOD_DATA { + pub bValid: u8_, + pub __bindgen_padding_0: [u8; 3usize], + pub dodMethodData: DOD_METHOD_DATA, + pub jtMethodData: JT_METHOD_DATA, + pub muxMethodData: MUX_METHOD_DATA, + pub capsMethodData: CAPS_METHOD_DATA, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct BUSINFO { + pub deviceID: u16_, + pub vendorID: u16_, + pub subdeviceID: u16_, + pub subvendorID: u16_, + pub revisionID: u8_, + pub __bindgen_padding_0: u8, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GSP_VF_INFO { + pub totalVFs: u32_, + pub firstVFOffset: u32_, + pub FirstVFBar0Address: u64_, + pub FirstVFBar1Address: u64_, + pub FirstVFBar2Address: u64_, + pub b64bitBar0: u8_, + pub b64bitBar1: u8_, + pub b64bitBar2: u8_, + pub __bindgen_padding_0: [u8; 5usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GSP_PCIE_CONFIG_REG { + pub linkCap: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] +pub struct GspSystemInfo { + pub gpuPhysAddr: u64_, + pub gpuPhysFbAddr: u64_, + pub gpuPhysInstAddr: u64_, + pub gpuPhysIoAddr: u64_, + pub nvDomainBusDeviceFunc: u64_, + pub simAccessBufPhysAddr: u64_, + pub notifyOpSharedSurfacePhysAddr: u64_, + pub pcieAtomicsOpMask: u64_, + pub consoleMemSize: u64_, + pub maxUserVa: u64_, + pub pciConfigMirrorBase: u32_, + pub pciConfigMirrorSize: u32_, + pub PCIDeviceID: u32_, + pub PCISubDeviceID: u32_, + pub PCIRevisionID: u32_, + pub pcieAtomicsCplDeviceCapMask: u32_, + pub oorArch: u8_, + pub __bindgen_padding_0: [u8; 7usize], + pub clPdbProperties: u64_, + pub Chipset: u32_, + pub bGpuBehindBridge: u8_, + pub bFlrSupported: u8_, + pub b64bBar0Supported: u8_, + pub bMnocAvailable: u8_, + pub chipsetL1ssEnable: u32_, + pub bUpstreamL0sUnsupported: u8_, + pub bUpstreamL1Unsupported: u8_, + pub bUpstreamL1PorSupported: u8_, + pub bUpstreamL1PorMobileOnly: u8_, + pub bSystemHasMux: u8_, + pub upstreamAddressValid: u8_, + pub FHBBusInfo: BUSINFO, + pub chipsetIDInfo: BUSINFO, + pub __bindgen_padding_1: [u8; 2usize], + pub acpiMethodData: ACPI_METHOD_DATA, + pub hypervisorType: u32_, + pub bIsPassthru: u8_, + pub __bindgen_padding_2: [u8; 7usize], + pub sysTimerOffsetNs: u64_, + pub gspVFInfo: GSP_VF_INFO, + pub bIsPrimary: u8_, + pub isGridBuild: u8_, + pub __bindgen_padding_3: [u8; 2usize], + pub pcieConfigReg: GSP_PCIE_CONFIG_REG, + pub gridBuildCsp: u32_, + pub bPreserveVideoMemoryAllocations: u8_, + pub bTdrEventSupported: u8_, + pub bFeatureStretchVblankCapable: u8_, + pub bEnableDynamicGranularityPageArrays: u8_, + pub bClockBoostSupported: u8_, + pub bRouteDispIntrsToCPU: u8_, + pub __bindgen_padding_4: [u8; 6usize], + pub hostPageSize: u64_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Zeroable)] pub struct MESSAGE_QUEUE_INIT_ARGUMENTS { pub sharedMemPhysAddr: u64_, pub pageTableEntryCount: u32_, -- cgit From 19b0a6e7c2be35f9b11c51c0c21c9a02c23252ff Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 10 Nov 2025 22:34:20 +0900 Subject: gpu: nova-core: gsp: Add SetRegistry command Add support for sending the SetRegistry command, which is critical to GSP initialization. The RM registry is serialized into a packed format and sent via the command queue. For now only three parameters which are required to boot GSP are hardcoded. In the future a kernel module parameter will be added to enable other parameters to be added. Signed-off-by: Alistair Popple [acourbot@nvidia.com: split into its own patch.] Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-12-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/gsp/boot.rs | 1 + drivers/gpu/nova-core/gsp/commands.rs | 104 ++++++++++++++++++++-- drivers/gpu/nova-core/gsp/fw/commands.rs | 50 +++++++++++ drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs | 16 ++++ drivers/gpu/nova-core/sbuffer.rs | 1 - 5 files changed, 165 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 56c9950b742c..770731c3eb89 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -158,6 +158,7 @@ impl super::Gsp { self.cmdq .send_command(bar, commands::SetSystemInfo::new(pdev))?; + self.cmdq.send_command(bar, commands::SetRegistry::new())?; Ok(()) } diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs index 305045e25693..d5be3bf10684 100644 --- a/drivers/gpu/nova-core/gsp/commands.rs +++ b/drivers/gpu/nova-core/gsp/commands.rs @@ -1,17 +1,23 @@ // SPDX-License-Identifier: GPL-2.0 +use core::convert::Infallible; + use kernel::{ device, pci, - prelude::*, // + prelude::*, + transmute::AsBytes, // }; -use crate::gsp::{ - cmdq::CommandToGsp, - fw::{ - commands::GspSetSystemInfo, - MsgFunction, // +use crate::{ + gsp::{ + cmdq::CommandToGsp, + fw::{ + commands::*, + MsgFunction, // + }, }, + sbuffer::SBufferIter, }; /// The `GspSetSystemInfo` command. @@ -35,3 +41,89 @@ impl<'a> CommandToGsp for SetSystemInfo<'a> { GspSetSystemInfo::init(self.pdev) } } + +struct RegistryEntry { + key: &'static str, + value: u32, +} + +/// The `SetRegistry` command. +pub(crate) struct SetRegistry { + entries: [RegistryEntry; Self::NUM_ENTRIES], +} + +impl SetRegistry { + // For now we hard-code the registry entries. Future work will allow others to + // be added as module parameters. + const NUM_ENTRIES: usize = 3; + + /// Creates a new `SetRegistry` command, using a set of hardcoded entries. + pub(crate) fn new() -> Self { + Self { + entries: [ + // RMSecBusResetEnable - enables PCI secondary bus reset + RegistryEntry { + key: "RMSecBusResetEnable", + value: 1, + }, + // RMForcePcieConfigSave - forces GSP-RM to preserve PCI configuration registers on + // any PCI reset. + RegistryEntry { + key: "RMForcePcieConfigSave", + value: 1, + }, + // RMDevidCheckIgnore - allows GSP-RM to boot even if the PCI dev ID is not found + // in the internal product name database. + RegistryEntry { + key: "RMDevidCheckIgnore", + value: 1, + }, + ], + } + } +} + +impl CommandToGsp for SetRegistry { + const FUNCTION: MsgFunction = MsgFunction::SetRegistry; + type Command = PackedRegistryTable; + type InitError = Infallible; + + fn init(&self) -> impl Init { + PackedRegistryTable::init(Self::NUM_ENTRIES as u32, self.variable_payload_len() as u32) + } + + fn variable_payload_len(&self) -> usize { + let mut key_size = 0; + for i in 0..Self::NUM_ENTRIES { + key_size += self.entries[i].key.len() + 1; // +1 for NULL terminator + } + Self::NUM_ENTRIES * size_of::() + key_size + } + + fn init_variable_payload( + &self, + dst: &mut SBufferIter>, + ) -> Result { + let string_data_start_offset = + size_of::() + Self::NUM_ENTRIES * size_of::(); + + // Array for string data. + let mut string_data = KVec::new(); + + for entry in self.entries.iter().take(Self::NUM_ENTRIES) { + dst.write_all( + PackedRegistryEntry::new( + (string_data_start_offset + string_data.len()) as u32, + entry.value, + ) + .as_bytes(), + )?; + + let key_bytes = entry.key.as_bytes(); + string_data.extend_from_slice(key_bytes, GFP_KERNEL)?; + string_data.push(0, GFP_KERNEL)?; + } + + dst.write_all(string_data.as_slice()) + } +} diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs b/drivers/gpu/nova-core/gsp/fw/commands.rs index 0d3c46f793dd..e5aab4032175 100644 --- a/drivers/gpu/nova-core/gsp/fw/commands.rs +++ b/drivers/gpu/nova-core/gsp/fw/commands.rs @@ -54,3 +54,53 @@ unsafe impl AsBytes for GspSetSystemInfo {} // SAFETY: These structs don't meet the no-padding requirements of FromBytes but // that is not a problem because they are not used outside the kernel. unsafe impl FromBytes for GspSetSystemInfo {} + +#[repr(transparent)] +pub(crate) struct PackedRegistryEntry(bindings::PACKED_REGISTRY_ENTRY); + +impl PackedRegistryEntry { + pub(crate) fn new(offset: u32, value: u32) -> Self { + Self({ + bindings::PACKED_REGISTRY_ENTRY { + nameOffset: offset, + + // We only support DWORD types for now. Support for other types + // will come later if required. + type_: bindings::REGISTRY_TABLE_ENTRY_TYPE_DWORD as u8, + __bindgen_padding_0: Default::default(), + data: value, + length: 0, + } + }) + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for PackedRegistryEntry {} + +/// Payload of the `SetRegistry` command. +#[repr(transparent)] +pub(crate) struct PackedRegistryTable { + inner: bindings::PACKED_REGISTRY_TABLE, +} + +impl PackedRegistryTable { + #[allow(non_snake_case)] + pub(crate) fn init(num_entries: u32, size: u32) -> impl Init { + type InnerPackedRegistryTable = bindings::PACKED_REGISTRY_TABLE; + let init_inner = init!(InnerPackedRegistryTable { + numEntries: num_entries, + size, + entries: Default::default() + }); + + init!(PackedRegistryTable { inner <- init_inner }) + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for PackedRegistryTable {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for PackedRegistryTable {} diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index 1251b0c313ce..32933874ff97 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -649,6 +649,22 @@ pub struct LibosMemoryRegionInitArgument { pub __bindgen_padding_0: [u8; 6usize], } #[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct PACKED_REGISTRY_ENTRY { + pub nameOffset: u32_, + pub type_: u8_, + pub __bindgen_padding_0: [u8; 3usize], + pub data: u32_, + pub length: u32_, +} +#[repr(C)] +#[derive(Debug, Default)] +pub struct PACKED_REGISTRY_TABLE { + pub size: u32_, + pub numEntries: u32_, + pub entries: __IncompleteArrayField, +} +#[repr(C)] #[derive(Debug, Default, Copy, Clone, Zeroable)] pub struct msgqTxHeader { pub version: u32_, diff --git a/drivers/gpu/nova-core/sbuffer.rs b/drivers/gpu/nova-core/sbuffer.rs index 25e3ad665cac..7a5947b8be19 100644 --- a/drivers/gpu/nova-core/sbuffer.rs +++ b/drivers/gpu/nova-core/sbuffer.rs @@ -199,7 +199,6 @@ where /// Ideally we would implement [`Write`], but it is not available in `core`. /// So mimic `std::io::Write::write_all`. - #[expect(unused)] pub(crate) fn write_all(&mut self, mut src: &[u8]) -> Result { while !src.is_empty() { match self.get_slice_mut(src.len()) { -- cgit From bb58d1aee6081dba5f64eacf48993c077e99dd4a Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 10 Nov 2025 22:34:21 +0900 Subject: gpu: nova-core: falcon: Add support to check if RISC-V is active Add definition for RISCV_CPUCTL register and use it in a new falcon API to check if the RISC-V core of a Falcon is active. It is required by the sequencer to know if the GSP's RISCV processor is active. Reviewed-by: Lyude Paul Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-13-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 9 +++++++++ drivers/gpu/nova-core/regs.rs | 7 ++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index fe5b3256d972..4c1f36073533 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -612,4 +612,13 @@ impl Falcon { self.hal .signature_reg_fuse_version(self, bar, engine_id_mask, ucode_id) } + + /// Check if the RISC-V core is active. + /// + /// Returns `true` if the RISC-V core is active, `false` otherwise. + #[expect(unused)] + pub(crate) fn is_riscv_active(&self, bar: &Bar0) -> bool { + let cpuctl = regs::NV_PRISCV_RISCV_CPUCTL::read(bar, &E::ID); + cpuctl.active_stat() + } } diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 41fdda8a0748..5d5ba766cd61 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -339,7 +339,12 @@ register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalcon2Base[0x00000210[1]] { // PRISCV -register!(NV_PRISCV_RISCV_BCR_CTRL @ PFalconBase[0x00001668] { +register!(NV_PRISCV_RISCV_CPUCTL @ PFalcon2Base[0x00000388] { + 0:0 halted as bool; + 7:7 active_stat as bool; +}); + +register!(NV_PRISCV_RISCV_BCR_CTRL @ PFalcon2Base[0x00000668] { 0:0 valid as bool; 4:4 core_select as bool => PeregrineCoreSelect; 8:8 br_fetch as bool; -- cgit From 945c1eee7dbeb0dcafc9d151eaa46a4273d386fa Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 10 Nov 2025 22:34:22 +0900 Subject: gpu: nova-core: falcon: Add support to write firmware version This will be needed by both the GSP boot code as well as GSP resume code in the sequencer. Reviewed-by: Lyude Paul Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-14-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 8 ++++++++ drivers/gpu/nova-core/regs.rs | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 4c1f36073533..31904e1d804b 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -621,4 +621,12 @@ impl Falcon { let cpuctl = regs::NV_PRISCV_RISCV_CPUCTL::read(bar, &E::ID); cpuctl.active_stat() } + + /// Write the application version to the OS register. + #[expect(dead_code)] + pub(crate) fn write_os_version(&self, bar: &Bar0, app_version: u32) { + regs::NV_PFALCON_FALCON_OS::default() + .set_value(app_version) + .write(bar, &E::ID); + } } diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 5d5ba766cd61..274e53a1a44d 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -230,6 +230,12 @@ register!(NV_PFALCON_FALCON_MAILBOX1 @ PFalconBase[0x00000044] { 31:0 value as u32; }); +// Used to store version information about the firmware running +// on the Falcon processor. +register!(NV_PFALCON_FALCON_OS @ PFalconBase[0x00000080] { + 31:0 value as u32; +}); + register!(NV_PFALCON_FALCON_RM @ PFalconBase[0x00000084] { 31:0 value as u32; }); -- cgit From 5949d419c193ce8e285acfbaafad88efe87f9dfa Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 10 Nov 2025 22:34:23 +0900 Subject: gpu: nova-core: gsp: Boot GSP Boot the GSP to the RISC-V active state. Completing the boot requires running the CPU sequencer which will be added in a future commit. Reviewed-by: Lyude Paul Signed-off-by: Alistair Popple Signed-off-by: Alexandre Courbot Message-ID: <20251110-gsp_boot-v9-15-8ae4058e3c0e@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 2 - drivers/gpu/nova-core/firmware/riscv.rs | 3 +- drivers/gpu/nova-core/gsp/boot.rs | 65 ++++++++++++++++++++++++++++++++- 3 files changed, 64 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 31904e1d804b..05b124acbfc1 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -616,14 +616,12 @@ impl Falcon { /// Check if the RISC-V core is active. /// /// Returns `true` if the RISC-V core is active, `false` otherwise. - #[expect(unused)] pub(crate) fn is_riscv_active(&self, bar: &Bar0) -> bool { let cpuctl = regs::NV_PRISCV_RISCV_CPUCTL::read(bar, &E::ID); cpuctl.active_stat() } /// Write the application version to the OS register. - #[expect(dead_code)] pub(crate) fn write_os_version(&self, bar: &Bar0, app_version: u32) { regs::NV_PFALCON_FALCON_OS::default() .set_value(app_version) diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs index 7d82fb9876e8..28dfef63657a 100644 --- a/drivers/gpu/nova-core/firmware/riscv.rs +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -57,7 +57,6 @@ impl RmRiscvUCodeDesc { } /// A parsed firmware for a RISC-V core, ready to be loaded and run. -#[expect(unused)] pub(crate) struct RiscvFirmware { /// Offset at which the code starts in the firmware image. pub(crate) code_offset: u32, @@ -66,7 +65,7 @@ pub(crate) struct RiscvFirmware { /// Offset at which the manifest starts in the firmware image. pub(crate) manifest_offset: u32, /// Application version. - app_version: u32, + pub(crate) app_version: u32, /// Device-mapped firmware image. pub(crate) ucode: DmaObject, } diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 770731c3eb89..eb0ee4f66f0c 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -4,8 +4,10 @@ use kernel::{ device, dma::CoherentAllocation, dma_write, + io::poll::read_poll_timeout, pci, - prelude::*, // + prelude::*, + time::Delta, // }; use crate::{ @@ -143,7 +145,7 @@ impl super::Gsp { Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; - let _booter_loader = BooterFirmware::new( + let booter_loader = BooterFirmware::new( dev, BooterKind::Loader, chipset, @@ -160,6 +162,65 @@ impl super::Gsp { .send_command(bar, commands::SetSystemInfo::new(pdev))?; self.cmdq.send_command(bar, commands::SetRegistry::new())?; + gsp_falcon.reset(bar)?; + let libos_handle = self.libos.dma_handle(); + let (mbox0, mbox1) = gsp_falcon.boot( + bar, + Some(libos_handle as u32), + Some((libos_handle >> 32) as u32), + )?; + dev_dbg!( + pdev.as_ref(), + "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", + mbox0, + mbox1 + ); + + dev_dbg!( + pdev.as_ref(), + "Using SEC2 to load and run the booter_load firmware...\n" + ); + + sec2_falcon.reset(bar)?; + sec2_falcon.dma_load(bar, &booter_loader)?; + let wpr_handle = wpr_meta.dma_handle(); + let (mbox0, mbox1) = sec2_falcon.boot( + bar, + Some(wpr_handle as u32), + Some((wpr_handle >> 32) as u32), + )?; + dev_dbg!( + pdev.as_ref(), + "SEC2 MBOX0: {:#x}, MBOX1{:#x}\n", + mbox0, + mbox1 + ); + + if mbox0 != 0 { + dev_err!( + pdev.as_ref(), + "Booter-load failed with error {:#x}\n", + mbox0 + ); + return Err(ENODEV); + } + + gsp_falcon.write_os_version(bar, gsp_fw.bootloader.app_version); + + // Poll for RISC-V to become active before running sequencer + read_poll_timeout( + || Ok(gsp_falcon.is_riscv_active(bar)), + |val: &bool| *val, + Delta::from_millis(10), + Delta::from_secs(5), + )?; + + dev_dbg!( + pdev.as_ref(), + "RISC-V active? {}\n", + gsp_falcon.is_riscv_active(bar), + ); + Ok(()) } } -- cgit From 38b7cc448a5b6772cad2b853a011f507ad95306a Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Thu, 13 Nov 2025 18:41:04 -0800 Subject: gpu: nova-core: implement Display for Spec Implement Display for Spec. This simplifies the dev_info!() code for printing banners such as: NVIDIA (Chipset: GA104, Architecture: Ampere, Revision: a.1) Cc: Alexandre Courbot Cc: Danilo Krummrich Cc: Timur Tabi Reviewed-by: Joel Fernandes Signed-off-by: John Hubbard Signed-off-by: Alexandre Courbot Message-ID: <20251114024109.465136-2-jhubbard@nvidia.com> --- drivers/gpu/nova-core/gpu.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 03dae437bc37..dfeba9d5d8f6 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -180,6 +180,18 @@ impl Spec { } } +impl fmt::Display for Spec { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "Chipset: {}, Architecture: {:?}, Revision: {}", + self.chipset, + self.chipset.arch(), + self.revision + ) + } +} + /// Structure holding the resources required to operate the GPU. #[pin_data] pub(crate) struct Gpu { @@ -206,13 +218,7 @@ impl Gpu { ) -> impl PinInit + 'a { try_pin_init!(Self { spec: Spec::new(bar).inspect(|spec| { - dev_info!( - pdev.as_ref(), - "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", - spec.chipset, - spec.chipset.arch(), - spec.revision - ); + dev_info!(pdev.as_ref(),"NVIDIA ({})\n", spec); })?, // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. -- cgit From 2d981773e14c1d7fc8f8e19a19d306d8372db571 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Nov 2025 14:55:40 -0500 Subject: gpu: nova-core: falcon: Move waiting until halted to a helper Move the "waiting until halted" functionality into a helper so that we can use it in the sequencer, which is a separate sequencer operation. Reviewed-by: Lyude Paul Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251114195552.739371-2-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 05b124acbfc1..1e51b94d9585 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -551,6 +551,19 @@ impl Falcon { Ok(()) } + /// Wait until the falcon CPU is halted. + pub(crate) fn wait_till_halted(&self, bar: &Bar0) -> Result<()> { + // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. + read_poll_timeout( + || Ok(regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID)), + |r| r.halted(), + Delta::ZERO, + Delta::from_secs(2), + )?; + + Ok(()) + } + /// Runs the loaded firmware and waits for its completion. /// /// `mbox0` and `mbox1` are optional parameters to write into the `MBOX0` and `MBOX1` registers @@ -585,13 +598,7 @@ impl Falcon { .write(bar, &E::ID), } - // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. - read_poll_timeout( - || Ok(regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID)), - |r| r.halted(), - Delta::ZERO, - Delta::from_secs(2), - )?; + self.wait_till_halted(bar)?; let (mbox0, mbox1) = ( regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value(), -- cgit From c5c0cfa67aa429b171676c6162f123adecb3a5ec Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Nov 2025 14:55:41 -0500 Subject: gpu: nova-core: falcon: Move start functionality into separate helper Move start functionality into a separate helper so we can use it from the sequencer. Reviewed-by: Lyude Paul Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251114195552.739371-3-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 1e51b94d9585..30af7fc2814d 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -564,7 +564,21 @@ impl Falcon { Ok(()) } - /// Runs the loaded firmware and waits for its completion. + /// Start the falcon CPU. + pub(crate) fn start(&self, bar: &Bar0) -> Result<()> { + match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID).alias_en() { + true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default() + .set_startcpu(true) + .write(bar, &E::ID), + false => regs::NV_PFALCON_FALCON_CPUCTL::default() + .set_startcpu(true) + .write(bar, &E::ID), + } + + Ok(()) + } + + /// Start running the loaded firmware. /// /// `mbox0` and `mbox1` are optional parameters to write into the `MBOX0` and `MBOX1` registers /// prior to running. @@ -589,15 +603,7 @@ impl Falcon { .write(bar, &E::ID); } - match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID).alias_en() { - true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default() - .set_startcpu(true) - .write(bar, &E::ID), - false => regs::NV_PFALCON_FALCON_CPUCTL::default() - .set_startcpu(true) - .write(bar, &E::ID), - } - + self.start(bar)?; self.wait_till_halted(bar)?; let (mbox0, mbox1) = ( -- cgit From 4f7656f79901dc00481b102c821705c992f7b982 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Nov 2025 14:55:42 -0500 Subject: gpu: nova-core: falcon: Move mbox functionalities into helper Move falcon reading/writing to mbox functionality into helper so we can use it from the sequencer resume flow. Reviewed-by: Lyude Paul Signed-off-by: Joel Fernandes [acourbot@nvidia.com: make write/read mailbox methods unfallible.] Signed-off-by: Alexandre Courbot Message-ID: <20251114195552.739371-4-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 54 ++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 30af7fc2814d..fd5a09f39a4a 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -578,6 +578,39 @@ impl Falcon { Ok(()) } + /// Writes values to the mailbox registers if provided. + pub(crate) fn write_mailboxes(&self, bar: &Bar0, mbox0: Option, mbox1: Option) { + if let Some(mbox0) = mbox0 { + regs::NV_PFALCON_FALCON_MAILBOX0::default() + .set_value(mbox0) + .write(bar, &E::ID); + } + + if let Some(mbox1) = mbox1 { + regs::NV_PFALCON_FALCON_MAILBOX1::default() + .set_value(mbox1) + .write(bar, &E::ID); + } + } + + /// Reads the value from `mbox0` register. + pub(crate) fn read_mailbox0(&self, bar: &Bar0) -> u32 { + regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value() + } + + /// Reads the value from `mbox1` register. + pub(crate) fn read_mailbox1(&self, bar: &Bar0) -> u32 { + regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, &E::ID).value() + } + + /// Reads values from both mailbox registers. + pub(crate) fn read_mailboxes(&self, bar: &Bar0) -> (u32, u32) { + let mbox0 = self.read_mailbox0(bar); + let mbox1 = self.read_mailbox1(bar); + + (mbox0, mbox1) + } + /// Start running the loaded firmware. /// /// `mbox0` and `mbox1` are optional parameters to write into the `MBOX0` and `MBOX1` registers @@ -591,27 +624,10 @@ impl Falcon { mbox0: Option, mbox1: Option, ) -> Result<(u32, u32)> { - if let Some(mbox0) = mbox0 { - regs::NV_PFALCON_FALCON_MAILBOX0::default() - .set_value(mbox0) - .write(bar, &E::ID); - } - - if let Some(mbox1) = mbox1 { - regs::NV_PFALCON_FALCON_MAILBOX1::default() - .set_value(mbox1) - .write(bar, &E::ID); - } - + self.write_mailboxes(bar, mbox0, mbox1); self.start(bar)?; self.wait_till_halted(bar)?; - - let (mbox0, mbox1) = ( - regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value(), - regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, &E::ID).value(), - ); - - Ok((mbox0, mbox1)) + Ok(self.read_mailboxes(bar)) } /// Returns the fused version of the signature to use in order to run a HS firmware on this -- cgit From 0f2c8e278e9c6219c080d1e016cd72b07d50e444 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Nov 2025 14:55:43 -0500 Subject: gpu: nova-core: falcon: Move dma_reset functionality into helper Move dma_reset so we can use it for the upcoming sequencer functionality. Reviewed-by: Lyude Paul Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251114195552.739371-5-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/falcon.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index fd5a09f39a4a..82c661aef594 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -382,6 +382,12 @@ impl Falcon { }) } + /// Resets DMA-related registers. + pub(crate) fn dma_reset(&self, bar: &Bar0) { + regs::NV_PFALCON_FBIF_CTL::update(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); + regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); + } + /// Wait for memory scrubbing to complete. fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { // TIMEOUT: memory scrubbing should complete in less than 20ms. @@ -531,8 +537,7 @@ impl Falcon { /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. pub(crate) fn dma_load>(&self, bar: &Bar0, fw: &F) -> Result { - regs::NV_PFALCON_FBIF_CTL::update(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); - regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); + self.dma_reset(bar); regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 0, |v| { v.set_target(FalconFbifTarget::CoherentSysmem) .set_mem_type(FalconFbifMemType::Physical) -- cgit From f77be04d00d8ce403ecaf547f1515a844bbde060 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Nov 2025 14:55:44 -0500 Subject: gpu: nova-core: gsp: Add support for checking if GSP reloaded During the sequencer process, we need to check if GSP was successfully reloaded. Add functionality to check for the same. Reviewed-by: Lyude Paul Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Message-ID: <20251114195552.739371-6-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/falcon/gsp.rs | 18 ++++++++++++++++++ drivers/gpu/nova-core/regs.rs | 6 ++++++ 2 files changed, 24 insertions(+) diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs index 93d4eca65631..9ef1fbae141f 100644 --- a/drivers/gpu/nova-core/falcon/gsp.rs +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -1,5 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 +use kernel::{ + io::poll::read_poll_timeout, + prelude::*, + time::Delta, // +}; + use crate::{ driver::Bar0, falcon::{ @@ -37,4 +43,16 @@ impl Falcon { .set_swgen0(true) .write(bar, &Gsp::ID); } + + /// Checks if GSP reload/resume has completed during the boot process. + #[expect(dead_code)] + pub(crate) fn check_reload_completed(&self, bar: &Bar0, timeout: Delta) -> Result { + read_poll_timeout( + || Ok(regs::NV_PGC6_BSI_SECURE_SCRATCH_14::read(bar)), + |val| val.boot_stage_3_handoff(), + Delta::ZERO, + timeout, + ) + .map(|_| true) + } } diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 274e53a1a44d..b32c07092f93 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -138,6 +138,12 @@ impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { // These scratch registers remain powered on even in a low-power state and have a designated group // number. +// Boot Sequence Interface (BSI) register used to determine +// if GSP reload/resume has completed during the boot process. +register!(NV_PGC6_BSI_SECURE_SCRATCH_14 @ 0x001180f8 { + 26:26 boot_stage_3_handoff as bool; +}); + // Privilege level mask register. It dictates whether the host CPU has privilege to access the // `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read GFW_BOOT). register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128, -- cgit From eaf0989c77e434ce491b2ed70ef505d8cf1d1d4b Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Nov 2025 14:55:45 -0500 Subject: gpu: nova-core: Add bindings required by GSP sequencer Add several firmware bindings required by GSP sequencer code. Co-developed-by: Alistair Popple Signed-off-by: Alistair Popple Reviewed-by: Lyude Paul Signed-off-by: Joel Fernandes [acourbot@nvidia.com: remove a couple stray lines/unwanted comment changes.] Signed-off-by: Alexandre Courbot Message-ID: <20251114195552.739371-7-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/gsp/fw.rs | 323 ++++++++++++++++++++++ drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs | 84 ++++++ 2 files changed, 407 insertions(+) diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index cacdfb2d4810..db3ef58b3ce7 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -312,6 +312,329 @@ impl From for u32 { } } +/// Sequencer buffer opcode for GSP sequencer commands. +#[derive(Copy, Clone, Debug, PartialEq)] +#[repr(u32)] +pub(crate) enum SeqBufOpcode { + // Core operation opcodes + CoreReset = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET, + CoreResume = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME, + CoreStart = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START, + CoreWaitForHalt = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT, + + // Delay opcode + DelayUs = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US, + + // Register operation opcodes + RegModify = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY, + RegPoll = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL, + RegStore = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE, + RegWrite = r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE, +} + +impl fmt::Display for SeqBufOpcode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SeqBufOpcode::CoreReset => write!(f, "CORE_RESET"), + SeqBufOpcode::CoreResume => write!(f, "CORE_RESUME"), + SeqBufOpcode::CoreStart => write!(f, "CORE_START"), + SeqBufOpcode::CoreWaitForHalt => write!(f, "CORE_WAIT_FOR_HALT"), + SeqBufOpcode::DelayUs => write!(f, "DELAY_US"), + SeqBufOpcode::RegModify => write!(f, "REG_MODIFY"), + SeqBufOpcode::RegPoll => write!(f, "REG_POLL"), + SeqBufOpcode::RegStore => write!(f, "REG_STORE"), + SeqBufOpcode::RegWrite => write!(f, "REG_WRITE"), + } + } +} + +impl TryFrom for SeqBufOpcode { + type Error = kernel::error::Error; + + fn try_from(value: u32) -> Result { + match value { + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET => { + Ok(SeqBufOpcode::CoreReset) + } + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME => { + Ok(SeqBufOpcode::CoreResume) + } + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START => { + Ok(SeqBufOpcode::CoreStart) + } + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT => { + Ok(SeqBufOpcode::CoreWaitForHalt) + } + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US => Ok(SeqBufOpcode::DelayUs), + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY => { + Ok(SeqBufOpcode::RegModify) + } + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL => Ok(SeqBufOpcode::RegPoll), + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE => Ok(SeqBufOpcode::RegStore), + r570_144::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE => Ok(SeqBufOpcode::RegWrite), + _ => Err(EINVAL), + } + } +} + +impl From for u32 { + fn from(value: SeqBufOpcode) -> Self { + // CAST: `SeqBufOpcode` is `repr(u32)` and can thus be cast losslessly. + value as u32 + } +} + +/// Wrapper for GSP sequencer register write payload. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub(crate) struct RegWritePayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_WRITE); + +#[expect(unused)] +impl RegWritePayload { + /// Returns the register address. + pub(crate) fn addr(&self) -> u32 { + self.0.addr + } + + /// Returns the value to write. + pub(crate) fn val(&self) -> u32 { + self.0.val + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RegWritePayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RegWritePayload {} + +/// Wrapper for GSP sequencer register modify payload. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub(crate) struct RegModifyPayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_MODIFY); + +#[expect(unused)] +impl RegModifyPayload { + /// Returns the register address. + pub(crate) fn addr(&self) -> u32 { + self.0.addr + } + + /// Returns the mask to apply. + pub(crate) fn mask(&self) -> u32 { + self.0.mask + } + + /// Returns the value to write. + pub(crate) fn val(&self) -> u32 { + self.0.val + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RegModifyPayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RegModifyPayload {} + +/// Wrapper for GSP sequencer register poll payload. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub(crate) struct RegPollPayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_POLL); + +#[expect(unused)] +impl RegPollPayload { + /// Returns the register address. + pub(crate) fn addr(&self) -> u32 { + self.0.addr + } + + /// Returns the mask to apply. + pub(crate) fn mask(&self) -> u32 { + self.0.mask + } + + /// Returns the expected value. + pub(crate) fn val(&self) -> u32 { + self.0.val + } + + /// Returns the timeout in microseconds. + pub(crate) fn timeout(&self) -> u32 { + self.0.timeout + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RegPollPayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RegPollPayload {} + +/// Wrapper for GSP sequencer delay payload. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub(crate) struct DelayUsPayload(r570_144::GSP_SEQ_BUF_PAYLOAD_DELAY_US); + +#[expect(unused)] +impl DelayUsPayload { + /// Returns the delay value in microseconds. + pub(crate) fn val(&self) -> u32 { + self.0.val + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for DelayUsPayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for DelayUsPayload {} + +/// Wrapper for GSP sequencer register store payload. +#[repr(transparent)] +#[derive(Copy, Clone)] +pub(crate) struct RegStorePayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_STORE); + +#[expect(unused)] +impl RegStorePayload { + /// Returns the register address. + pub(crate) fn addr(&self) -> u32 { + self.0.addr + } + + /// Returns the storage index. + pub(crate) fn index(&self) -> u32 { + self.0.index + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RegStorePayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RegStorePayload {} + +/// Wrapper for GSP sequencer buffer command. +#[repr(transparent)] +pub(crate) struct SequencerBufferCmd(r570_144::GSP_SEQUENCER_BUFFER_CMD); + +#[expect(unused)] +impl SequencerBufferCmd { + /// Returns the opcode as a `SeqBufOpcode` enum, or error if invalid. + pub(crate) fn opcode(&self) -> Result { + self.0.opCode.try_into() + } + + /// Returns the register write payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::RegWrite`. + pub(crate) fn reg_write_payload(&self) -> Result { + if self.opcode()? != SeqBufOpcode::RegWrite { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `RegWrite`, so union contains valid `RegWritePayload`. + let payload_bytes = unsafe { + core::slice::from_raw_parts( + core::ptr::addr_of!(self.0.payload.regWrite).cast::(), + core::mem::size_of::(), + ) + }; + Ok(*RegWritePayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + } + + /// Returns the register modify payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::RegModify`. + pub(crate) fn reg_modify_payload(&self) -> Result { + if self.opcode()? != SeqBufOpcode::RegModify { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `RegModify`, so union contains valid `RegModifyPayload`. + let payload_bytes = unsafe { + core::slice::from_raw_parts( + core::ptr::addr_of!(self.0.payload.regModify).cast::(), + core::mem::size_of::(), + ) + }; + Ok(*RegModifyPayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + } + + /// Returns the register poll payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::RegPoll`. + pub(crate) fn reg_poll_payload(&self) -> Result { + if self.opcode()? != SeqBufOpcode::RegPoll { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `RegPoll`, so union contains valid `RegPollPayload`. + let payload_bytes = unsafe { + core::slice::from_raw_parts( + core::ptr::addr_of!(self.0.payload.regPoll).cast::(), + core::mem::size_of::(), + ) + }; + Ok(*RegPollPayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + } + + /// Returns the delay payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::DelayUs`. + pub(crate) fn delay_us_payload(&self) -> Result { + if self.opcode()? != SeqBufOpcode::DelayUs { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `DelayUs`, so union contains valid `DelayUsPayload`. + let payload_bytes = unsafe { + core::slice::from_raw_parts( + core::ptr::addr_of!(self.0.payload.delayUs).cast::(), + core::mem::size_of::(), + ) + }; + Ok(*DelayUsPayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + } + + /// Returns the register store payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::RegStore`. + pub(crate) fn reg_store_payload(&self) -> Result { + if self.opcode()? != SeqBufOpcode::RegStore { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `RegStore`, so union contains valid `RegStorePayload`. + let payload_bytes = unsafe { + core::slice::from_raw_parts( + core::ptr::addr_of!(self.0.payload.regStore).cast::(), + core::mem::size_of::(), + ) + }; + Ok(*RegStorePayload::from_bytes(payload_bytes).ok_or(EINVAL)?) + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for SequencerBufferCmd {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for SequencerBufferCmd {} + +/// Wrapper for GSP run CPU sequencer RPC. +#[repr(transparent)] +pub(crate) struct RunCpuSequencer(r570_144::rpc_run_cpu_sequencer_v17_00); + +#[expect(unused)] +impl RunCpuSequencer { + /// Returns the command index. + pub(crate) fn cmd_index(&self) -> u32 { + self.0.cmdIndex + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RunCpuSequencer {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RunCpuSequencer {} + /// Struct containing the arguments required to pass a memory buffer to the GSP /// for use during initialisation. /// diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index 32933874ff97..61553fad5f7b 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -702,3 +702,87 @@ impl Default for GSP_MSG_QUEUE_ELEMENT { } } } +#[repr(C)] +#[derive(Debug, Default)] +pub struct rpc_run_cpu_sequencer_v17_00 { + pub bufferSizeDWord: u32_, + pub cmdIndex: u32_, + pub regSaveArea: [u32_; 8usize], + pub commandBuffer: __IncompleteArrayField, +} +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE: GSP_SEQ_BUF_OPCODE = 0; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY: GSP_SEQ_BUF_OPCODE = 1; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL: GSP_SEQ_BUF_OPCODE = 2; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US: GSP_SEQ_BUF_OPCODE = 3; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE: GSP_SEQ_BUF_OPCODE = 4; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET: GSP_SEQ_BUF_OPCODE = 5; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START: GSP_SEQ_BUF_OPCODE = 6; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT: GSP_SEQ_BUF_OPCODE = 7; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME: GSP_SEQ_BUF_OPCODE = 8; +pub type GSP_SEQ_BUF_OPCODE = ffi::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GSP_SEQ_BUF_PAYLOAD_REG_WRITE { + pub addr: u32_, + pub val: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GSP_SEQ_BUF_PAYLOAD_REG_MODIFY { + pub addr: u32_, + pub mask: u32_, + pub val: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GSP_SEQ_BUF_PAYLOAD_REG_POLL { + pub addr: u32_, + pub mask: u32_, + pub val: u32_, + pub timeout: u32_, + pub error: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GSP_SEQ_BUF_PAYLOAD_DELAY_US { + pub val: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct GSP_SEQ_BUF_PAYLOAD_REG_STORE { + pub addr: u32_, + pub index: u32_, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct GSP_SEQUENCER_BUFFER_CMD { + pub opCode: GSP_SEQ_BUF_OPCODE, + pub payload: GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1 { + pub regWrite: GSP_SEQ_BUF_PAYLOAD_REG_WRITE, + pub regModify: GSP_SEQ_BUF_PAYLOAD_REG_MODIFY, + pub regPoll: GSP_SEQ_BUF_PAYLOAD_REG_POLL, + pub delayUs: GSP_SEQ_BUF_PAYLOAD_DELAY_US, + pub regStore: GSP_SEQ_BUF_PAYLOAD_REG_STORE, +} +impl Default for GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +impl Default for GSP_SEQUENCER_BUFFER_CMD { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} -- cgit From 6ddfc892a529cb314e4708c5654beb0fa37e2071 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Nov 2025 14:55:46 -0500 Subject: gpu: nova-core: Implement the GSP sequencer Implement the GSP sequencer which culminates in INIT_DONE message being received from the GSP indicating that the GSP has successfully booted. This is just initial sequencer support, the actual commands will be added in the next patches. Signed-off-by: Joel Fernandes [acourbot@nvidia.com: move GspSequencerInfo definition before its impl blocks and rename it to GspSequence, adapt imports in sequencer.rs to new formatting rules, remove `timeout` argument to harmonize with other commands.] Signed-off-by: Alexandre Courbot Message-ID: <20251114195552.739371-8-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/gsp.rs | 1 + drivers/gpu/nova-core/gsp/boot.rs | 15 +++ drivers/gpu/nova-core/gsp/cmdq.rs | 1 - drivers/gpu/nova-core/gsp/fw.rs | 1 - drivers/gpu/nova-core/gsp/sequencer.rs | 236 +++++++++++++++++++++++++++++++++ drivers/gpu/nova-core/sbuffer.rs | 1 - 6 files changed, 252 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/nova-core/gsp/sequencer.rs diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index e40354c47608..fb6f74797178 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -17,6 +17,7 @@ use kernel::{ pub(crate) mod cmdq; pub(crate) mod commands; mod fw; +mod sequencer; pub(crate) use fw::{ GspFwWprMeta, diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index eb0ee4f66f0c..d62bab07e861 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -33,6 +33,10 @@ use crate::{ gpu::Chipset, gsp::{ commands, + sequencer::{ + GspSequencer, + GspSequencerParams, // + }, GspFwWprMeta, // }, regs, @@ -221,6 +225,17 @@ impl super::Gsp { gsp_falcon.is_riscv_active(bar), ); + // Create and run the GSP sequencer. + let seq_params = GspSequencerParams { + bootloader_app_version: gsp_fw.bootloader.app_version, + libos_dma_handle: libos_handle, + gsp_falcon, + sec2_falcon, + dev: pdev.as_ref().into(), + bar, + }; + GspSequencer::run(&mut self.cmdq, seq_params)?; + Ok(()) } } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs index c0f3218f2980..6f946d14868a 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -645,7 +645,6 @@ impl Cmdq { /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the /// message queue. /// - `EINVAL` if the function of the message was unrecognized. - #[expect(unused)] pub(crate) fn receive_msg(&mut self, timeout: Delta) -> Result where // This allows all error types, including `Infallible`, to be used for `M::InitError`. diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index db3ef58b3ce7..4b569fc4c0b1 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -621,7 +621,6 @@ unsafe impl AsBytes for SequencerBufferCmd {} #[repr(transparent)] pub(crate) struct RunCpuSequencer(r570_144::rpc_run_cpu_sequencer_v17_00); -#[expect(unused)] impl RunCpuSequencer { /// Returns the command index. pub(crate) fn cmd_index(&self) -> u32 { diff --git a/drivers/gpu/nova-core/gsp/sequencer.rs b/drivers/gpu/nova-core/gsp/sequencer.rs new file mode 100644 index 000000000000..1f4f0a0d999b --- /dev/null +++ b/drivers/gpu/nova-core/gsp/sequencer.rs @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! GSP Sequencer implementation for Pre-hopper GSP boot sequence. + +use core::{ + array, + mem::size_of, // +}; + +use kernel::{ + device, + prelude::*, + time::Delta, + transmute::FromBytes, + types::ARef, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + gsp::Gsp, + sec2::Sec2, + Falcon, // + }, + gsp::{ + cmdq::{ + Cmdq, + MessageFromGsp, // + }, + fw, + }, + sbuffer::SBufferIter, +}; + +/// GSP Sequencer information containing the command sequence and data. +struct GspSequence { + /// Current command index for error reporting. + cmd_index: u32, + /// Command data buffer containing the sequence of commands. + cmd_data: KVec, +} + +impl MessageFromGsp for GspSequence { + const FUNCTION: fw::MsgFunction = fw::MsgFunction::GspRunCpuSequencer; + type InitError = Error; + type Message = fw::RunCpuSequencer; + + fn read( + msg: &Self::Message, + sbuffer: &mut SBufferIter>, + ) -> Result { + let cmd_data = sbuffer.flush_into_kvec(GFP_KERNEL)?; + Ok(GspSequence { + cmd_index: msg.cmd_index(), + cmd_data, + }) + } +} + +const CMD_SIZE: usize = size_of::(); + +/// GSP Sequencer Command types with payload data. +/// Commands have an opcode and an opcode-dependent struct. +#[allow(dead_code)] +pub(crate) enum GspSeqCmd {} + +impl GspSeqCmd { + /// Creates a new `GspSeqCmd` from raw data returning the command and its size in bytes. + pub(crate) fn new(data: &[u8], _dev: &device::Device) -> Result<(Self, usize)> { + let _fw_cmd = fw::SequencerBufferCmd::from_bytes(data).ok_or(EINVAL)?; + let _opcode_size = core::mem::size_of::(); + + // NOTE: At this commit, NO opcodes exist yet, so just return error. + // Later commits will add match arms here. + Err(EINVAL) + } +} + +/// GSP Sequencer for executing firmware commands during boot. +#[expect(dead_code)] +pub(crate) struct GspSequencer<'a> { + /// Sequencer information with command data. + seq_info: GspSequence, + /// `Bar0` for register access. + bar: &'a Bar0, + /// SEC2 falcon for core operations. + sec2_falcon: &'a Falcon, + /// GSP falcon for core operations. + gsp_falcon: &'a Falcon, + /// LibOS DMA handle address. + libos_dma_handle: u64, + /// Bootloader application version. + bootloader_app_version: u32, + /// Device for logging. + dev: ARef, +} + +/// Trait for running sequencer commands. +pub(crate) trait GspSeqCmdRunner { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result; +} + +impl GspSeqCmdRunner for GspSeqCmd { + fn run(&self, _seq: &GspSequencer<'_>) -> Result { + Ok(()) + } +} + +/// Iterator over GSP sequencer commands. +pub(crate) struct GspSeqIter<'a> { + /// Command data buffer. + cmd_data: &'a [u8], + /// Current position in the buffer. + current_offset: usize, + /// Total number of commands to process. + total_cmds: u32, + /// Number of commands processed so far. + cmds_processed: u32, + /// Device for logging. + dev: ARef, +} + +impl<'a> Iterator for GspSeqIter<'a> { + type Item = Result; + + fn next(&mut self) -> Option { + // Stop if we've processed all commands or reached the end of data. + if self.cmds_processed >= self.total_cmds || self.current_offset >= self.cmd_data.len() { + return None; + } + + // Check if we have enough data for opcode. + if self.current_offset + core::mem::size_of::() > self.cmd_data.len() { + return Some(Err(EIO)); + } + + let offset = self.current_offset; + + // Handle command creation based on available data, + // zero-pad if necessary (since last command may not be full size). + let mut buffer = [0u8; CMD_SIZE]; + let copy_len = if offset + CMD_SIZE <= self.cmd_data.len() { + CMD_SIZE + } else { + self.cmd_data.len() - offset + }; + buffer[..copy_len].copy_from_slice(&self.cmd_data[offset..offset + copy_len]); + let cmd_result = GspSeqCmd::new(&buffer, &self.dev); + + cmd_result.map_or_else( + |_err| { + dev_err!(self.dev, "Error parsing command at offset {}", offset); + None + }, + |(cmd, size)| { + self.current_offset += size; + self.cmds_processed += 1; + Some(Ok(cmd)) + }, + ) + } +} + +impl<'a> GspSequencer<'a> { + fn iter(&self) -> GspSeqIter<'_> { + let cmd_data = &self.seq_info.cmd_data[..]; + + GspSeqIter { + cmd_data, + current_offset: 0, + total_cmds: self.seq_info.cmd_index, + cmds_processed: 0, + dev: self.dev.clone(), + } + } +} + +/// Parameters for running the GSP sequencer. +pub(crate) struct GspSequencerParams<'a> { + /// Bootloader application version. + pub(crate) bootloader_app_version: u32, + /// LibOS DMA handle address. + pub(crate) libos_dma_handle: u64, + /// GSP falcon for core operations. + pub(crate) gsp_falcon: &'a Falcon, + /// SEC2 falcon for core operations. + pub(crate) sec2_falcon: &'a Falcon, + /// Device for logging. + pub(crate) dev: ARef, + /// BAR0 for register access. + pub(crate) bar: &'a Bar0, +} + +impl<'a> GspSequencer<'a> { + pub(crate) fn run(cmdq: &mut Cmdq, params: GspSequencerParams<'a>) -> Result { + let seq_info = loop { + match cmdq.receive_msg::(Delta::from_secs(10)) { + Ok(seq_info) => break seq_info, + Err(ERANGE) => continue, + Err(e) => return Err(e), + } + }; + + let sequencer = GspSequencer { + seq_info, + bar: params.bar, + sec2_falcon: params.sec2_falcon, + gsp_falcon: params.gsp_falcon, + libos_dma_handle: params.libos_dma_handle, + bootloader_app_version: params.bootloader_app_version, + dev: params.dev, + }; + + dev_dbg!(sequencer.dev, "Running CPU Sequencer commands"); + + for cmd_result in sequencer.iter() { + match cmd_result { + Ok(cmd) => cmd.run(&sequencer)?, + Err(e) => { + dev_err!( + sequencer.dev, + "Error running command at index {}", + sequencer.seq_info.cmd_index + ); + return Err(e); + } + } + } + + dev_dbg!( + sequencer.dev, + "CPU Sequencer commands completed successfully" + ); + Ok(()) + } +} diff --git a/drivers/gpu/nova-core/sbuffer.rs b/drivers/gpu/nova-core/sbuffer.rs index 7a5947b8be19..64758b7fae56 100644 --- a/drivers/gpu/nova-core/sbuffer.rs +++ b/drivers/gpu/nova-core/sbuffer.rs @@ -168,7 +168,6 @@ where /// Read all the remaining data into a [`KVec`]. /// /// `self` will be empty after this operation. - #[expect(unused)] pub(crate) fn flush_into_kvec(&mut self, flags: kernel::alloc::Flags) -> Result> { let mut buf = KVec::::new(); -- cgit From 2367ce2e9e5eae3bfe72ed79a7fbd86936158569 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Nov 2025 14:55:47 -0500 Subject: gpu: nova-core: sequencer: Add register opcodes These opcodes are used for register write, modify, poll and store (save) sequencer operations. Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot [acourbot@nvidia.com: apply Lyude's suggested fixes.] Message-ID: <20251114195552.739371-9-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/gsp/fw.rs | 5 +- drivers/gpu/nova-core/gsp/sequencer.rs | 117 +++++++++++++++++++++++++++++---- 2 files changed, 107 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index 4b569fc4c0b1..99486f194b07 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -389,7 +389,6 @@ impl From for u32 { #[derive(Copy, Clone)] pub(crate) struct RegWritePayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_WRITE); -#[expect(unused)] impl RegWritePayload { /// Returns the register address. pub(crate) fn addr(&self) -> u32 { @@ -413,7 +412,6 @@ unsafe impl AsBytes for RegWritePayload {} #[derive(Copy, Clone)] pub(crate) struct RegModifyPayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_MODIFY); -#[expect(unused)] impl RegModifyPayload { /// Returns the register address. pub(crate) fn addr(&self) -> u32 { @@ -442,7 +440,6 @@ unsafe impl AsBytes for RegModifyPayload {} #[derive(Copy, Clone)] pub(crate) struct RegPollPayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_POLL); -#[expect(unused)] impl RegPollPayload { /// Returns the register address. pub(crate) fn addr(&self) -> u32 { @@ -495,7 +492,6 @@ unsafe impl AsBytes for DelayUsPayload {} #[derive(Copy, Clone)] pub(crate) struct RegStorePayload(r570_144::GSP_SEQ_BUF_PAYLOAD_REG_STORE); -#[expect(unused)] impl RegStorePayload { /// Returns the register address. pub(crate) fn addr(&self) -> u32 { @@ -503,6 +499,7 @@ impl RegStorePayload { } /// Returns the storage index. + #[allow(unused)] pub(crate) fn index(&self) -> u32 { self.0.index } diff --git a/drivers/gpu/nova-core/gsp/sequencer.rs b/drivers/gpu/nova-core/gsp/sequencer.rs index 1f4f0a0d999b..de5d6b4433a2 100644 --- a/drivers/gpu/nova-core/gsp/sequencer.rs +++ b/drivers/gpu/nova-core/gsp/sequencer.rs @@ -4,11 +4,15 @@ use core::{ array, - mem::size_of, // + mem::{ + size_of, + size_of_val, // + }, }; use kernel::{ device, + io::poll::read_poll_timeout, prelude::*, time::Delta, transmute::FromBytes, @@ -29,6 +33,7 @@ use crate::{ }, fw, }, + num::FromSafeCast, sbuffer::SBufferIter, }; @@ -61,18 +66,50 @@ const CMD_SIZE: usize = size_of::(); /// GSP Sequencer Command types with payload data. /// Commands have an opcode and an opcode-dependent struct. -#[allow(dead_code)] -pub(crate) enum GspSeqCmd {} +#[allow(clippy::enum_variant_names)] +pub(crate) enum GspSeqCmd { + RegWrite(fw::RegWritePayload), + RegModify(fw::RegModifyPayload), + RegPoll(fw::RegPollPayload), + RegStore(fw::RegStorePayload), +} impl GspSeqCmd { /// Creates a new `GspSeqCmd` from raw data returning the command and its size in bytes. - pub(crate) fn new(data: &[u8], _dev: &device::Device) -> Result<(Self, usize)> { - let _fw_cmd = fw::SequencerBufferCmd::from_bytes(data).ok_or(EINVAL)?; - let _opcode_size = core::mem::size_of::(); + pub(crate) fn new(data: &[u8], dev: &device::Device) -> Result<(Self, usize)> { + let fw_cmd = fw::SequencerBufferCmd::from_bytes(data).ok_or(EINVAL)?; + let opcode_size = core::mem::size_of::(); - // NOTE: At this commit, NO opcodes exist yet, so just return error. - // Later commits will add match arms here. - Err(EINVAL) + let (cmd, size) = match fw_cmd.opcode()? { + fw::SeqBufOpcode::RegWrite => { + let payload = fw_cmd.reg_write_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::RegWrite(payload), size) + } + fw::SeqBufOpcode::RegModify => { + let payload = fw_cmd.reg_modify_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::RegModify(payload), size) + } + fw::SeqBufOpcode::RegPoll => { + let payload = fw_cmd.reg_poll_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::RegPoll(payload), size) + } + fw::SeqBufOpcode::RegStore => { + let payload = fw_cmd.reg_store_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::RegStore(payload), size) + } + _ => return Err(EINVAL), + }; + + if data.len() < size { + dev_err!(dev, "Data is not enough for command"); + return Err(EINVAL); + } + + Ok((cmd, size)) } } @@ -100,9 +137,67 @@ pub(crate) trait GspSeqCmdRunner { fn run(&self, sequencer: &GspSequencer<'_>) -> Result; } +impl GspSeqCmdRunner for fw::RegWritePayload { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result { + let addr = usize::from_safe_cast(self.addr()); + + sequencer.bar.try_write32(self.val(), addr) + } +} + +impl GspSeqCmdRunner for fw::RegModifyPayload { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result { + let addr = usize::from_safe_cast(self.addr()); + + sequencer.bar.try_read32(addr).and_then(|val| { + sequencer + .bar + .try_write32((val & !self.mask()) | self.val(), addr) + }) + } +} + +impl GspSeqCmdRunner for fw::RegPollPayload { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result { + let addr = usize::from_safe_cast(self.addr()); + + // Default timeout to 4 seconds. + let timeout_us = if self.timeout() == 0 { + 4_000_000 + } else { + i64::from(self.timeout()) + }; + + // First read. + sequencer.bar.try_read32(addr)?; + + // Poll the requested register with requested timeout. + read_poll_timeout( + || sequencer.bar.try_read32(addr), + |current| (current & self.mask()) == self.val(), + Delta::ZERO, + Delta::from_micros(timeout_us), + ) + .map(|_| ()) + } +} + +impl GspSeqCmdRunner for fw::RegStorePayload { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result { + let addr = usize::from_safe_cast(self.addr()); + + sequencer.bar.try_read32(addr).map(|_| ()) + } +} + impl GspSeqCmdRunner for GspSeqCmd { - fn run(&self, _seq: &GspSequencer<'_>) -> Result { - Ok(()) + fn run(&self, seq: &GspSequencer<'_>) -> Result { + match self { + GspSeqCmd::RegWrite(cmd) => cmd.run(seq), + GspSeqCmd::RegModify(cmd) => cmd.run(seq), + GspSeqCmd::RegPoll(cmd) => cmd.run(seq), + GspSeqCmd::RegStore(cmd) => cmd.run(seq), + } } } -- cgit From e386680e8dbb37ff2973a2cbcf7124899e5359df Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Nov 2025 14:55:48 -0500 Subject: gpu: nova-core: sequencer: Add delay opcode support Implement a sequencer opcode for delay operations. Signed-off-by: Joel Fernandes Reviewed-by: Lyude Paul Signed-off-by: Alexandre Courbot Message-ID: <20251114195552.739371-10-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/gsp/fw.rs | 2 -- drivers/gpu/nova-core/gsp/sequencer.rs | 19 ++++++++++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index 99486f194b07..8deec5e0a1d4 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -473,7 +473,6 @@ unsafe impl AsBytes for RegPollPayload {} #[derive(Copy, Clone)] pub(crate) struct DelayUsPayload(r570_144::GSP_SEQ_BUF_PAYLOAD_DELAY_US); -#[expect(unused)] impl DelayUsPayload { /// Returns the delay value in microseconds. pub(crate) fn val(&self) -> u32 { @@ -515,7 +514,6 @@ unsafe impl AsBytes for RegStorePayload {} #[repr(transparent)] pub(crate) struct SequencerBufferCmd(r570_144::GSP_SEQUENCER_BUFFER_CMD); -#[expect(unused)] impl SequencerBufferCmd { /// Returns the opcode as a `SeqBufOpcode` enum, or error if invalid. pub(crate) fn opcode(&self) -> Result { diff --git a/drivers/gpu/nova-core/gsp/sequencer.rs b/drivers/gpu/nova-core/gsp/sequencer.rs index de5d6b4433a2..970c252f1a03 100644 --- a/drivers/gpu/nova-core/gsp/sequencer.rs +++ b/drivers/gpu/nova-core/gsp/sequencer.rs @@ -14,7 +14,10 @@ use kernel::{ device, io::poll::read_poll_timeout, prelude::*, - time::Delta, + time::{ + delay::fsleep, + Delta, // + }, transmute::FromBytes, types::ARef, // }; @@ -71,6 +74,7 @@ pub(crate) enum GspSeqCmd { RegWrite(fw::RegWritePayload), RegModify(fw::RegModifyPayload), RegPoll(fw::RegPollPayload), + DelayUs(fw::DelayUsPayload), RegStore(fw::RegStorePayload), } @@ -96,6 +100,11 @@ impl GspSeqCmd { let size = opcode_size + size_of_val(&payload); (GspSeqCmd::RegPoll(payload), size) } + fw::SeqBufOpcode::DelayUs => { + let payload = fw_cmd.delay_us_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::DelayUs(payload), size) + } fw::SeqBufOpcode::RegStore => { let payload = fw_cmd.reg_store_payload()?; let size = opcode_size + size_of_val(&payload); @@ -182,6 +191,13 @@ impl GspSeqCmdRunner for fw::RegPollPayload { } } +impl GspSeqCmdRunner for fw::DelayUsPayload { + fn run(&self, _sequencer: &GspSequencer<'_>) -> Result { + fsleep(Delta::from_micros(i64::from(self.val()))); + Ok(()) + } +} + impl GspSeqCmdRunner for fw::RegStorePayload { fn run(&self, sequencer: &GspSequencer<'_>) -> Result { let addr = usize::from_safe_cast(self.addr()); @@ -196,6 +212,7 @@ impl GspSeqCmdRunner for GspSeqCmd { GspSeqCmd::RegWrite(cmd) => cmd.run(seq), GspSeqCmd::RegModify(cmd) => cmd.run(seq), GspSeqCmd::RegPoll(cmd) => cmd.run(seq), + GspSeqCmd::DelayUs(cmd) => cmd.run(seq), GspSeqCmd::RegStore(cmd) => cmd.run(seq), } } -- cgit From 9641f052230041cdf853357c07e77632b729113f Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Nov 2025 14:55:49 -0500 Subject: gpu: nova-core: sequencer: Implement basic core operations These opcodes implement various falcon-related boot operations: reset, start, wait-for-halt. Signed-off-by: Joel Fernandes Reviewed-by: Lyude Paul Signed-off-by: Alexandre Courbot Message-ID: <20251114195552.739371-11-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/gsp/sequencer.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/nova-core/gsp/sequencer.rs b/drivers/gpu/nova-core/gsp/sequencer.rs index 970c252f1a03..081d1903e8f8 100644 --- a/drivers/gpu/nova-core/gsp/sequencer.rs +++ b/drivers/gpu/nova-core/gsp/sequencer.rs @@ -76,6 +76,9 @@ pub(crate) enum GspSeqCmd { RegPoll(fw::RegPollPayload), DelayUs(fw::DelayUsPayload), RegStore(fw::RegStorePayload), + CoreReset, + CoreStart, + CoreWaitForHalt, } impl GspSeqCmd { @@ -110,6 +113,9 @@ impl GspSeqCmd { let size = opcode_size + size_of_val(&payload); (GspSeqCmd::RegStore(payload), size) } + fw::SeqBufOpcode::CoreReset => (GspSeqCmd::CoreReset, opcode_size), + fw::SeqBufOpcode::CoreStart => (GspSeqCmd::CoreStart, opcode_size), + fw::SeqBufOpcode::CoreWaitForHalt => (GspSeqCmd::CoreWaitForHalt, opcode_size), _ => return Err(EINVAL), }; @@ -214,6 +220,19 @@ impl GspSeqCmdRunner for GspSeqCmd { GspSeqCmd::RegPoll(cmd) => cmd.run(seq), GspSeqCmd::DelayUs(cmd) => cmd.run(seq), GspSeqCmd::RegStore(cmd) => cmd.run(seq), + GspSeqCmd::CoreReset => { + seq.gsp_falcon.reset(seq.bar)?; + seq.gsp_falcon.dma_reset(seq.bar); + Ok(()) + } + GspSeqCmd::CoreStart => { + seq.gsp_falcon.start(seq.bar)?; + Ok(()) + } + GspSeqCmd::CoreWaitForHalt => { + seq.gsp_falcon.wait_till_halted(seq.bar)?; + Ok(()) + } } } } -- cgit From 774109857b70e4af0908ce20d99e913ba5ba611a Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Nov 2025 14:55:50 -0500 Subject: gpu: nova-core: sequencer: Implement core resume operation Implement core resume operation. This is the last step of the sequencer resulting in resume of the GSP and proceeding to INIT_DONE stage of GSP boot. Signed-off-by: Joel Fernandes Reviewed-by: Lyude Paul Signed-off-by: Alexandre Courbot Message-ID: <20251114195552.739371-12-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/falcon/gsp.rs | 1 - drivers/gpu/nova-core/gsp/sequencer.rs | 44 ++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs index 9ef1fbae141f..67edef3636c1 100644 --- a/drivers/gpu/nova-core/falcon/gsp.rs +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -45,7 +45,6 @@ impl Falcon { } /// Checks if GSP reload/resume has completed during the boot process. - #[expect(dead_code)] pub(crate) fn check_reload_completed(&self, bar: &Bar0, timeout: Delta) -> Result { read_poll_timeout( || Ok(regs::NV_PGC6_BSI_SECURE_SCRATCH_14::read(bar)), diff --git a/drivers/gpu/nova-core/gsp/sequencer.rs b/drivers/gpu/nova-core/gsp/sequencer.rs index 081d1903e8f8..2d0369c49092 100644 --- a/drivers/gpu/nova-core/gsp/sequencer.rs +++ b/drivers/gpu/nova-core/gsp/sequencer.rs @@ -79,6 +79,7 @@ pub(crate) enum GspSeqCmd { CoreReset, CoreStart, CoreWaitForHalt, + CoreResume, } impl GspSeqCmd { @@ -116,7 +117,7 @@ impl GspSeqCmd { fw::SeqBufOpcode::CoreReset => (GspSeqCmd::CoreReset, opcode_size), fw::SeqBufOpcode::CoreStart => (GspSeqCmd::CoreStart, opcode_size), fw::SeqBufOpcode::CoreWaitForHalt => (GspSeqCmd::CoreWaitForHalt, opcode_size), - _ => return Err(EINVAL), + fw::SeqBufOpcode::CoreResume => (GspSeqCmd::CoreResume, opcode_size), }; if data.len() < size { @@ -129,7 +130,6 @@ impl GspSeqCmd { } /// GSP Sequencer for executing firmware commands during boot. -#[expect(dead_code)] pub(crate) struct GspSequencer<'a> { /// Sequencer information with command data. seq_info: GspSequence, @@ -233,6 +233,46 @@ impl GspSeqCmdRunner for GspSeqCmd { seq.gsp_falcon.wait_till_halted(seq.bar)?; Ok(()) } + GspSeqCmd::CoreResume => { + // At this point, 'SEC2-RTOS' has been loaded into SEC2 by the sequencer + // but neither SEC2-RTOS nor GSP-RM is running yet. This part of the + // sequencer will start both. + + // Reset the GSP to prepare it for resuming. + seq.gsp_falcon.reset(seq.bar)?; + + // Write the libOS DMA handle to GSP mailboxes. + seq.gsp_falcon.write_mailboxes( + seq.bar, + Some(seq.libos_dma_handle as u32), + Some((seq.libos_dma_handle >> 32) as u32), + ); + + // Start the SEC2 falcon which will trigger GSP-RM to resume on the GSP. + seq.sec2_falcon.start(seq.bar)?; + + // Poll until GSP-RM reload/resume has completed (up to 2 seconds). + seq.gsp_falcon + .check_reload_completed(seq.bar, Delta::from_secs(2))?; + + // Verify SEC2 completed successfully by checking its mailbox for errors. + let mbox0 = seq.sec2_falcon.read_mailbox0(seq.bar); + if mbox0 != 0 { + dev_err!(seq.dev, "Sequencer: sec2 errors: {:?}\n", mbox0); + return Err(EIO); + } + + // Configure GSP with the bootloader version. + seq.gsp_falcon + .write_os_version(seq.bar, seq.bootloader_app_version); + + // Verify the GSP's RISC-V core is active indicating successful GSP boot. + if !seq.gsp_falcon.is_riscv_active(seq.bar) { + dev_err!(seq.dev, "Sequencer: RISC-V core is not active\n"); + return Err(EIO); + } + Ok(()) + } } } } -- cgit From 0e7d572b4baa64c582dafc4af36cfc8a4c3c1252 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 14 Nov 2025 14:55:51 -0500 Subject: gpu: nova-core: gsp: Wait for gsp initialization to complete This adds the GSP init done command to wait for GSP initialization to complete. Once this command has been received the GSP is fully operational and will respond properly to normal RPC commands. Signed-off-by: Alistair Popple Co-developed-by: Joel Fernandes Signed-off-by: Joel Fernandes Reviewed-by: Lyude Paul [acourbot@nvidia.com: move new definitions to end of commands.rs, rename to `wait_gsp_init_done` and remove timeout argument.] Signed-off-by: Alexandre Courbot Message-ID: <20251114195552.739371-13-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/gsp/boot.rs | 3 +++ drivers/gpu/nova-core/gsp/commands.rs | 48 ++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index d62bab07e861..0845d0906ca1 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -236,6 +236,9 @@ impl super::Gsp { }; GspSequencer::run(&mut self.cmdq, seq_params)?; + // Wait until GSP is fully initialized. + commands::wait_gsp_init_done(&mut self.cmdq)?; + Ok(()) } } diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs index d5be3bf10684..b544603703d3 100644 --- a/drivers/gpu/nova-core/gsp/commands.rs +++ b/drivers/gpu/nova-core/gsp/commands.rs @@ -1,17 +1,28 @@ // SPDX-License-Identifier: GPL-2.0 -use core::convert::Infallible; +use core::{ + array, + convert::Infallible, // +}; use kernel::{ device, pci, prelude::*, - transmute::AsBytes, // + time::Delta, + transmute::{ + AsBytes, + FromBytes, // + }, // }; use crate::{ gsp::{ - cmdq::CommandToGsp, + cmdq::{ + Cmdq, + CommandToGsp, + MessageFromGsp, // + }, fw::{ commands::*, MsgFunction, // @@ -127,3 +138,34 @@ impl CommandToGsp for SetRegistry { dst.write_all(string_data.as_slice()) } } + +/// Message type for GSP initialization done notification. +struct GspInitDone {} + +// SAFETY: `GspInitDone` is a zero-sized type with no bytes, therefore it +// trivially has no uninitialized bytes. +unsafe impl FromBytes for GspInitDone {} + +impl MessageFromGsp for GspInitDone { + const FUNCTION: MsgFunction = MsgFunction::GspInitDone; + type InitError = Infallible; + type Message = GspInitDone; + + fn read( + _msg: &Self::Message, + _sbuffer: &mut SBufferIter>, + ) -> Result { + Ok(GspInitDone {}) + } +} + +/// Waits for GSP initialization to complete. +pub(crate) fn wait_gsp_init_done(cmdq: &mut Cmdq) -> Result { + loop { + match cmdq.receive_msg::(Delta::from_secs(10)) { + Ok(_) => break Ok(()), + Err(ERANGE) => continue, + Err(e) => break Err(e), + } + } +} -- cgit From 13f85988d4fa31bda73a9504d71b10f7a14f1856 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 14 Nov 2025 14:55:52 -0500 Subject: gpu: nova-core: gsp: Retrieve GSP static info to gather GPU information After GSP initialization is complete, retrieve the static configuration information from GSP-RM. This information includes GPU name, capabilities, memory configuration, and other properties. On some GPU variants, it is also required to do this for initialization to complete. Signed-off-by: Alistair Popple Co-developed-by: Joel Fernandes Signed-off-by: Joel Fernandes Reviewed-by: Lyude Paul [acourbot@nvidia.com: properly abstract the command's bindings, add relevant methods, make str_from_null_terminated return an Option, fix size of GPU name array.] Co-developed-by: Alexandre Courbot Signed-off-by: Alexandre Courbot Message-ID: <20251114195552.739371-14-joelagnelf@nvidia.com> --- drivers/gpu/nova-core/gsp/boot.rs | 8 ++ drivers/gpu/nova-core/gsp/commands.rs | 56 ++++++++ drivers/gpu/nova-core/gsp/fw/commands.rs | 22 +++ drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs | 163 ++++++++++++++++++++++ drivers/gpu/nova-core/nova_core.rs | 1 + drivers/gpu/nova-core/util.rs | 16 +++ 6 files changed, 266 insertions(+) create mode 100644 drivers/gpu/nova-core/util.rs diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 0845d0906ca1..54937606b5b0 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -239,6 +239,14 @@ impl super::Gsp { // Wait until GSP is fully initialized. commands::wait_gsp_init_done(&mut self.cmdq)?; + // Obtain and display basic GPU information. + let info = commands::get_gsp_info(&mut self.cmdq, bar)?; + dev_info!( + pdev.as_ref(), + "GPU name: {}\n", + info.gpu_name().unwrap_or("invalid GPU name") + ); + Ok(()) } } diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs index b544603703d3..0425c65b5d6f 100644 --- a/drivers/gpu/nova-core/gsp/commands.rs +++ b/drivers/gpu/nova-core/gsp/commands.rs @@ -17,6 +17,7 @@ use kernel::{ }; use crate::{ + driver::Bar0, gsp::{ cmdq::{ Cmdq, @@ -29,6 +30,7 @@ use crate::{ }, }, sbuffer::SBufferIter, + util, }; /// The `GspSetSystemInfo` command. @@ -169,3 +171,57 @@ pub(crate) fn wait_gsp_init_done(cmdq: &mut Cmdq) -> Result { } } } + +/// The `GetGspStaticInfo` command. +struct GetGspStaticInfo; + +impl CommandToGsp for GetGspStaticInfo { + const FUNCTION: MsgFunction = MsgFunction::GetGspStaticInfo; + type Command = GspStaticConfigInfo; + type InitError = Infallible; + + fn init(&self) -> impl Init { + GspStaticConfigInfo::init_zeroed() + } +} + +/// The reply from the GSP to the [`GetGspInfo`] command. +pub(crate) struct GetGspStaticInfoReply { + gpu_name: [u8; 64], +} + +impl MessageFromGsp for GetGspStaticInfoReply { + const FUNCTION: MsgFunction = MsgFunction::GetGspStaticInfo; + type Message = GspStaticConfigInfo; + type InitError = Infallible; + + fn read( + msg: &Self::Message, + _sbuffer: &mut SBufferIter>, + ) -> Result { + Ok(GetGspStaticInfoReply { + gpu_name: msg.gpu_name_str(), + }) + } +} + +impl GetGspStaticInfoReply { + /// Returns the name of the GPU as a string, or `None` if the string given by the GSP was + /// invalid. + pub(crate) fn gpu_name(&self) -> Option<&str> { + util::str_from_null_terminated(&self.gpu_name) + } +} + +/// Send the [`GetGspInfo`] command and awaits for its reply. +pub(crate) fn get_gsp_info(cmdq: &mut Cmdq, bar: &Bar0) -> Result { + cmdq.send_command(bar, GetGspStaticInfo)?; + + loop { + match cmdq.receive_msg::(Delta::from_secs(5)) { + Ok(info) => return Ok(info), + Err(ERANGE) => continue, + Err(e) => return Err(e), + } + } +} diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs b/drivers/gpu/nova-core/gsp/fw/commands.rs index e5aab4032175..21be44199693 100644 --- a/drivers/gpu/nova-core/gsp/fw/commands.rs +++ b/drivers/gpu/nova-core/gsp/fw/commands.rs @@ -104,3 +104,25 @@ unsafe impl AsBytes for PackedRegistryTable {} // SAFETY: This struct only contains integer types for which all bit patterns // are valid. unsafe impl FromBytes for PackedRegistryTable {} + +/// Payload of the `GetGspStaticInfo` command and message. +#[repr(transparent)] +pub(crate) struct GspStaticConfigInfo(bindings::GspStaticConfigInfo_t); + +impl GspStaticConfigInfo { + /// Returns a bytes array containing the (hopefully) zero-terminated name of this GPU. + pub(crate) fn gpu_name_str(&self) -> [u8; 64] { + self.0.gpuNameString + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for GspStaticConfigInfo {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for GspStaticConfigInfo {} + +// SAFETY: This struct only contains integer types and fixed-size arrays for which +// all bit patterns are valid. +unsafe impl Zeroable for GspStaticConfigInfo {} diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs index 61553fad5f7b..5bcfbcd1ad22 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -320,6 +320,77 @@ pub const NV_VGPU_MSG_EVENT_RECOVERY_ACTION: _bindgen_ty_3 = 4130; pub const NV_VGPU_MSG_EVENT_NUM_EVENTS: _bindgen_ty_3 = 4131; pub type _bindgen_ty_3 = ffi::c_uint; #[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS { + pub totalVFs: u32_, + pub firstVfOffset: u32_, + pub vfFeatureMask: u32_, + pub FirstVFBar0Address: u64_, + pub FirstVFBar1Address: u64_, + pub FirstVFBar2Address: u64_, + pub bar0Size: u64_, + pub bar1Size: u64_, + pub bar2Size: u64_, + pub b64bitBar0: u8_, + pub b64bitBar1: u8_, + pub b64bitBar2: u8_, + pub bSriovEnabled: u8_, + pub bSriovHeavyEnabled: u8_, + pub bEmulateVFBar0TlbInvalidationRegister: u8_, + pub bClientRmAllocatedCtxBuffer: u8_, + pub bNonPowerOf2ChannelCountSupported: u8_, + pub bVfResizableBAR1Supported: u8_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct NV2080_CTRL_BIOS_GET_SKU_INFO_PARAMS { + pub BoardID: u32_, + pub chipSKU: [ffi::c_char; 9usize], + pub chipSKUMod: [ffi::c_char; 5usize], + pub skuConfigVersion: u32_, + pub project: [ffi::c_char; 5usize], + pub projectSKU: [ffi::c_char; 5usize], + pub CDP: [ffi::c_char; 6usize], + pub projectSKUMod: [ffi::c_char; 2usize], + pub businessCycle: u32_, +} +pub type NV2080_CTRL_CMD_FB_GET_FB_REGION_SURFACE_MEM_TYPE_FLAG = [u8_; 17usize]; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO { + pub base: u64_, + pub limit: u64_, + pub reserved: u64_, + pub performance: u32_, + pub supportCompressed: u8_, + pub supportISO: u8_, + pub bProtected: u8_, + pub blackList: NV2080_CTRL_CMD_FB_GET_FB_REGION_SURFACE_MEM_TYPE_FLAG, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS { + pub numFBRegions: u32_, + pub fbRegion: [NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO; 16usize], +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct NV2080_CTRL_GPU_GET_GID_INFO_PARAMS { + pub index: u32_, + pub flags: u32_, + pub length: u32_, + pub data: [u8_; 256usize], +} +impl Default for NV2080_CTRL_GPU_GET_GID_INFO_PARAMS { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +#[repr(C)] #[derive(Debug, Default, Copy, Clone, Zeroable)] pub struct DOD_METHOD_DATA { pub status: u32_, @@ -367,6 +438,19 @@ pub struct ACPI_METHOD_DATA { pub capsMethodData: CAPS_METHOD_DATA, } #[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct VIRTUAL_DISPLAY_GET_MAX_RESOLUTION_PARAMS { + pub headIndex: u32_, + pub maxHResolution: u32_, + pub maxVResolution: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct VIRTUAL_DISPLAY_GET_NUM_HEADS_PARAMS { + pub numHeads: u32_, + pub maxNumHeads: u32_, +} +#[repr(C)] #[derive(Debug, Default, Copy, Clone, Zeroable)] pub struct BUSINFO { pub deviceID: u16_, @@ -395,6 +479,85 @@ pub struct GSP_PCIE_CONFIG_REG { pub linkCap: u32_, } #[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct EcidManufacturingInfo { + pub ecidLow: u32_, + pub ecidHigh: u32_, + pub ecidExtended: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct FW_WPR_LAYOUT_OFFSET { + pub nonWprHeapOffset: u64_, + pub frtsOffset: u64_, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct GspStaticConfigInfo_t { + pub grCapsBits: [u8_; 23usize], + pub gidInfo: NV2080_CTRL_GPU_GET_GID_INFO_PARAMS, + pub SKUInfo: NV2080_CTRL_BIOS_GET_SKU_INFO_PARAMS, + pub fbRegionInfoParams: NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS, + pub sriovCaps: NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS, + pub sriovMaxGfid: u32_, + pub engineCaps: [u32_; 3usize], + pub poisonFuseEnabled: u8_, + pub fb_length: u64_, + pub fbio_mask: u64_, + pub fb_bus_width: u32_, + pub fb_ram_type: u32_, + pub fbp_mask: u64_, + pub l2_cache_size: u32_, + pub gpuNameString: [u8_; 64usize], + pub gpuShortNameString: [u8_; 64usize], + pub gpuNameString_Unicode: [u16_; 64usize], + pub bGpuInternalSku: u8_, + pub bIsQuadroGeneric: u8_, + pub bIsQuadroAd: u8_, + pub bIsNvidiaNvs: u8_, + pub bIsVgx: u8_, + pub bGeforceSmb: u8_, + pub bIsTitan: u8_, + pub bIsTesla: u8_, + pub bIsMobile: u8_, + pub bIsGc6Rtd3Allowed: u8_, + pub bIsGc8Rtd3Allowed: u8_, + pub bIsGcOffRtd3Allowed: u8_, + pub bIsGcoffLegacyAllowed: u8_, + pub bIsMigSupported: u8_, + pub RTD3GC6TotalBoardPower: u16_, + pub RTD3GC6PerstDelay: u16_, + pub bar1PdeBase: u64_, + pub bar2PdeBase: u64_, + pub bVbiosValid: u8_, + pub vbiosSubVendor: u32_, + pub vbiosSubDevice: u32_, + pub bPageRetirementSupported: u8_, + pub bSplitVasBetweenServerClientRm: u8_, + pub bClRootportNeedsNosnoopWAR: u8_, + pub displaylessMaxHeads: VIRTUAL_DISPLAY_GET_NUM_HEADS_PARAMS, + pub displaylessMaxResolution: VIRTUAL_DISPLAY_GET_MAX_RESOLUTION_PARAMS, + pub displaylessMaxPixels: u64_, + pub hInternalClient: u32_, + pub hInternalDevice: u32_, + pub hInternalSubdevice: u32_, + pub bSelfHostedMode: u8_, + pub bAtsSupported: u8_, + pub bIsGpuUefi: u8_, + pub bIsEfiInit: u8_, + pub ecidInfo: [EcidManufacturingInfo; 2usize], + pub fwWprLayoutOffset: FW_WPR_LAYOUT_OFFSET, +} +impl Default for GspStaticConfigInfo_t { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +#[repr(C)] #[derive(Debug, Default, Copy, Clone, Zeroable)] pub struct GspSystemInfo { pub gpuPhysAddr: u64_, diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index c1121e7c64c5..b98a1c03f13d 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -16,6 +16,7 @@ mod gsp; mod num; mod regs; mod sbuffer; +mod util; mod vbios; pub(crate) const MODULE_NAME: &kernel::str::CStr = ::NAME; diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs new file mode 100644 index 000000000000..4b503249a3ef --- /dev/null +++ b/drivers/gpu/nova-core/util.rs @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +/// Converts a null-terminated byte slice to a string, or `None` if the array does not +/// contains any null byte or contains invalid characters. +/// +/// Contrary to [`kernel::str::CStr::from_bytes_with_nul`], the null byte can be anywhere in the +/// slice, and not only in the last position. +pub(crate) fn str_from_null_terminated(bytes: &[u8]) -> Option<&str> { + use kernel::str::CStr; + + bytes + .iter() + .position(|&b| b == 0) + .and_then(|null_pos| CStr::from_bytes_with_nul(&bytes[..=null_pos]).ok()) + .and_then(|cstr| cstr.to_str().ok()) +} -- cgit From df6137e263ee6ac3921f87321e784421eb64fb35 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 14 Nov 2025 17:09:19 -0800 Subject: gpu: nova-core: prepare Spec and Revision types for boot0/boot42 Allow a both Revision and Spec to be constructed directly from a NV_PMC_BOOT_0 register. Also, slightly enhance the comment about Spec, to be more precise. Cc: Alexandre Courbot Cc: Danilo Krummrich Cc: Timur Tabi Reviewed-by: Joel Fernandes Signed-off-by: John Hubbard Signed-off-by: Alexandre Courbot Message-ID: <20251115010923.1192144-2-jhubbard@nvidia.com> --- drivers/gpu/nova-core/gpu.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index dfeba9d5d8f6..57c20d1e7274 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -147,8 +147,8 @@ pub(crate) struct Revision { minor: u8, } -impl Revision { - fn from_boot0(boot0: regs::NV_PMC_BOOT_0) -> Self { +impl From for Revision { + fn from(boot0: regs::NV_PMC_BOOT_0) -> Self { Self { major: boot0.major_revision(), minor: boot0.minor_revision(), @@ -162,10 +162,9 @@ impl fmt::Display for Revision { } } -/// Structure holding the metadata of the GPU. +/// Structure holding a basic description of the GPU: `Chipset` and `Revision`. pub(crate) struct Spec { chipset: Chipset, - /// The revision of the chipset. revision: Revision, } @@ -173,9 +172,17 @@ impl Spec { fn new(bar: &Bar0) -> Result { let boot0 = regs::NV_PMC_BOOT_0::read(bar); + Spec::try_from(boot0) + } +} + +impl TryFrom for Spec { + type Error = Error; + + fn try_from(boot0: regs::NV_PMC_BOOT_0) -> Result { Ok(Self { chipset: boot0.chipset()?, - revision: Revision::from_boot0(boot0), + revision: boot0.into(), }) } } -- cgit From 4d980333a66341a764a64a29df668aac1cd6ec41 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 14 Nov 2025 17:09:20 -0800 Subject: gpu: nova-core: make Architecture behave as a u8 type This allows Architecture to be passed into register!() and bitfield!() macro calls. That in turn requires a default implementation for Architecture. This simplifies transforming BOOT0 (and later, BOOT42) register values into GPU architectures. Cc: Danilo Krummrich Cc: Timur Tabi Suggested-by: Alexandre Courbot Reviewed-by: Joel Fernandes Signed-off-by: John Hubbard Signed-off-by: Alexandre Courbot Message-ID: <20251115010923.1192144-3-jhubbard@nvidia.com> --- drivers/gpu/nova-core/gpu.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 57c20d1e7274..88a6d7af9f37 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -122,8 +122,14 @@ impl fmt::Display for Chipset { } /// Enum representation of the GPU generation. -#[derive(fmt::Debug)] +/// +/// TODO: remove the `Default` trait implementation, and the `#[default]` +/// attribute, once the register!() macro (which creates Architecture items) no +/// longer requires it for read-only fields. +#[derive(fmt::Debug, Default, Copy, Clone)] +#[repr(u8)] pub(crate) enum Architecture { + #[default] Turing = 0x16, Ampere = 0x17, Ada = 0x19, @@ -142,6 +148,13 @@ impl TryFrom for Architecture { } } +impl From for u8 { + fn from(value: Architecture) -> Self { + // CAST: `Architecture` is `repr(u8)`, so this cast is always lossless. + value as u8 + } +} + pub(crate) struct Revision { major: u8, minor: u8, -- cgit From 0ecc08e2c450d9d3aebfc0c093db444d77557469 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 14 Nov 2025 17:09:21 -0800 Subject: gpu: nova-core: add boot42 support for next-gen GPUs NVIDIA GPUs are moving away from using NV_PMC_BOOT_0 to contain architecture and revision details, and will instead use NV_PMC_BOOT_42 in the future. NV_PMC_BOOT_0 will contain a specific set of values that will mean "go read NV_PMC_BOOT_42 instead". Change the selection logic in Nova so that it will claim Turing and later GPUs. This will work for the foreseeable future, without any further code changes here, because all NVIDIA GPUs are considered, from the oldest supported on Linux (NV04), through the future GPUs. Add some comment documentation to explain, chronologically, how boot0 and boot42 change with the GPU eras, and how that affects the selection logic. Cc: Alexandre Courbot Cc: Danilo Krummrich Cc: Timur Tabi Reviewed-by: Joel Fernandes Signed-off-by: John Hubbard [acourbot@nvidia.com: remove unneeded `From for Revision` implementation.] Signed-off-by: Alexandre Courbot Message-ID: <20251115010923.1192144-4-jhubbard@nvidia.com> --- drivers/gpu/nova-core/gpu.rs | 36 +++++++++++++++++++++++++++++------- drivers/gpu/nova-core/regs.rs | 21 ++++++++++++++++----- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 88a6d7af9f37..3e3375f8fe99 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -160,8 +160,8 @@ pub(crate) struct Revision { minor: u8, } -impl From for Revision { - fn from(boot0: regs::NV_PMC_BOOT_0) -> Self { +impl From for Revision { + fn from(boot0: regs::NV_PMC_BOOT_42) -> Self { Self { major: boot0.major_revision(), minor: boot0.minor_revision(), @@ -183,19 +183,41 @@ pub(crate) struct Spec { impl Spec { fn new(bar: &Bar0) -> Result { + // Some brief notes about boot0 and boot42, in chronological order: + // + // NV04 through NV50: + // + // Not supported by Nova. boot0 is necessary and sufficient to identify these GPUs. + // boot42 may not even exist on some of these GPUs. + // + // Fermi through Volta: + // + // Not supported by Nova. boot0 is still sufficient to identify these GPUs, but boot42 + // is also guaranteed to be both present and accurate. + // + // Turing and later: + // + // Supported by Nova. Identified by first checking boot0 to ensure that the GPU is not + // from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU. + // Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs. + let boot0 = regs::NV_PMC_BOOT_0::read(bar); - Spec::try_from(boot0) + if boot0.is_older_than_fermi() { + return Err(ENODEV); + } + + Spec::try_from(regs::NV_PMC_BOOT_42::read(bar)) } } -impl TryFrom for Spec { +impl TryFrom for Spec { type Error = Error; - fn try_from(boot0: regs::NV_PMC_BOOT_0) -> Result { + fn try_from(boot42: regs::NV_PMC_BOOT_42) -> Result { Ok(Self { - chipset: boot0.chipset()?, - revision: boot0.into(), + chipset: boot42.chipset()?, + revision: boot42.into(), }) } } diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index b32c07092f93..60b543ed254a 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -40,13 +40,24 @@ register!(NV_PMC_BOOT_0 @ 0x00000000, "Basic revision information about the GPU" }); impl NV_PMC_BOOT_0 { - /// Combines `architecture_0` and `architecture_1` to obtain the architecture of the chip. - pub(crate) fn architecture(self) -> Result { - Architecture::try_from( - self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0_RANGE.len()), - ) + pub(crate) fn is_older_than_fermi(self) -> bool { + // From https://github.com/NVIDIA/open-gpu-doc/tree/master/manuals : + const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u8 = 0xc; + + // Older chips left arch1 zeroed out. That, combined with an arch0 value that is less than + // GF100, means "older than Fermi". + self.architecture_1() == 0 && self.architecture_0() < NV_PMC_BOOT_0_ARCHITECTURE_GF100 } +} + +register!(NV_PMC_BOOT_42 @ 0x00000a00, "Extended architecture information" { + 15:12 minor_revision as u8, "Minor revision of the chip"; + 19:16 major_revision as u8, "Major revision of the chip"; + 23:20 implementation as u8, "Implementation version of the architecture"; + 29:24 architecture as u8 ?=> Architecture, "Architecture value"; +}); +impl NV_PMC_BOOT_42 { /// Combines `architecture` and `implementation` to obtain a code unique to the chipset. pub(crate) fn chipset(self) -> Result { self.architecture() -- cgit From ce89e3e019f1ec4b11356f35feb8bd8c0f2c6bf7 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 14 Nov 2025 17:09:22 -0800 Subject: gpu: nova-core: provide a clear error report for unsupported GPUs Pass in a PCI device to Spec::new(), and provide a Display implementation for boot42, in order to provide a clear, concise report of what happened: the driver read NV_PMC_BOOT42, and found that the GPU is not supported. For very old GPUs (older than Fermi), the driver still returns ENODEV, but it does so without a driver-specific dmesg report. That is exactly appropriate, because if such a GPU is installed, it can only be supported by Nouveau. And if so, the user is not helped by additional error messages from Nova. Here's the full dmesg output for a Blackwell (not yet supported) GPU: NovaCore 0000:01:00.0: Probe Nova Core GPU driver. NovaCore 0000:01:00.0: Unsupported chipset: boot42 = 0x1b2a1000 (architecture 0x1b, implementation 0x2) NovaCore 0000:01:00.0: probe with driver NovaCore failed with error -524 Cc: Alexandre Courbot Cc: Danilo Krummrich Cc: Timur Tabi Cc: Joel Fernandes Signed-off-by: John Hubbard [acourbot@nvidia.com: fix commit log with ENODEV (not ENOTSUPP) error code for unsupported GPUs.] Signed-off-by: Alexandre Courbot Message-ID: <20251115010923.1192144-5-jhubbard@nvidia.com> --- drivers/gpu/nova-core/gpu.rs | 9 ++++++--- drivers/gpu/nova-core/regs.rs | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 3e3375f8fe99..19755af6ad04 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -182,7 +182,7 @@ pub(crate) struct Spec { } impl Spec { - fn new(bar: &Bar0) -> Result { + fn new(dev: &device::Device, bar: &Bar0) -> Result { // Some brief notes about boot0 and boot42, in chronological order: // // NV04 through NV50: @@ -207,7 +207,10 @@ impl Spec { return Err(ENODEV); } - Spec::try_from(regs::NV_PMC_BOOT_42::read(bar)) + let boot42 = regs::NV_PMC_BOOT_42::read(bar); + Spec::try_from(boot42).inspect_err(|_| { + dev_err!(dev, "Unsupported chipset: {}\n", boot42); + }) } } @@ -259,7 +262,7 @@ impl Gpu { bar: &'a Bar0, ) -> impl PinInit + 'a { try_pin_init!(Self { - spec: Spec::new(bar).inspect(|spec| { + spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| { dev_info!(pdev.as_ref(),"NVIDIA ({})\n", spec); })?, diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 60b543ed254a..82cc6c0790e5 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -67,6 +67,24 @@ impl NV_PMC_BOOT_42 { }) .and_then(Chipset::try_from) } + + /// Returns the raw architecture value from the register. + fn architecture_raw(self) -> u8 { + ((self.0 >> Self::ARCHITECTURE_RANGE.start()) & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1)) + as u8 + } +} + +impl kernel::fmt::Display for NV_PMC_BOOT_42 { + fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result { + write!( + f, + "boot42 = 0x{:08x} (architecture 0x{:x}, implementation 0x{:x})", + self.0, + self.architecture_raw(), + self.implementation() + ) + } } // PBUS -- cgit From 77b686f688126a5f758b51441a03186e9eb1b0f1 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 17 Nov 2025 10:39:17 +0000 Subject: gpu: nova-core: make formatting compatible with rust tree Commit 38b7cc448a5b ("gpu: nova-core: implement Display for Spec") in drm-rust-next introduced some usage of the Display trait, but the Display trait is being modified in the rust tree this cycle. Thus, to avoid conflicts with the Rust tree, tweak how the formatting machinery is used in a way where it works both with and without the changes in the Rust tree. Reviewed-by: Alexandre Courbot Tested-by: Alexandre Courbot Link: https://patch.msgid.link/20251117-nova-fmt-rust-v1-1-651ca28cd98f@google.com Signed-off-by: Alice Ryhl --- drivers/gpu/nova-core/gpu.rs | 5 ++--- drivers/gpu/nova-core/gsp/fw.rs | 6 ++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 19755af6ad04..629c9d2dc994 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -227,13 +227,12 @@ impl TryFrom for Spec { impl fmt::Display for Spec { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, + f.write_fmt(fmt!( "Chipset: {}, Architecture: {:?}, Revision: {}", self.chipset, self.chipset.arch(), self.revision - ) + )) } } diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs index 8deec5e0a1d4..abffd6beec65 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -6,13 +6,11 @@ mod r570_144; // Alias to avoid repeating the version number with every use. use r570_144 as bindings; -use core::{ - fmt, - ops::Range, // -}; +use core::ops::Range; use kernel::{ dma::CoherentAllocation, + fmt, prelude::*, ptr::{ Alignable, -- cgit